diff --git a/llvm/test/CodeGen/SPIRV/FOrdGreaterThanEqual_int.ll b/llvm/test/CodeGen/SPIRV/FOrdGreaterThanEqual_int.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/FOrdGreaterThanEqual_int.ll @@ -0,0 +1,14 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV: %[[#result:]] = OpFOrdGreaterThanEqual %[[#]] %[[#]] %[[#]] +; CHECK-SPIRV: %[[#]] = OpSelect %[[#]] %[[#result]] %[[#]] %[[#]] + +;; LLVM IR was generated with -cl-std=c++ option + +define spir_kernel void @test(float %op1, float %op2) { +entry: + %call = call spir_func i32 @_Z14isgreaterequalff(float %op1, float %op2) + ret void +} + +declare spir_func i32 @_Z14isgreaterequalff(float, float) diff --git a/llvm/test/CodeGen/SPIRV/capability-Int64Atomics-store.ll b/llvm/test/CodeGen/SPIRV/capability-Int64Atomics-store.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/capability-Int64Atomics-store.ll @@ -0,0 +1,19 @@ +;; OpenCL C source: +;; #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable +;; #pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable +;; +;; void foo (volatile atomic_long *object, long desired) { +;; atomic_store(object, desired); +;; } + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s + +; CHECK: OpCapability Int64Atomics + +define spir_func void @foo(i64 addrspace(4)* %object, i64 %desired) { +entry: + tail call spir_func void @_Z12atomic_storePVU3AS4U7_Atomicll(i64 addrspace(4)* %object, i64 %desired) + ret void +} + +declare spir_func void @_Z12atomic_storePVU3AS4U7_Atomicll(i64 addrspace(4)*, i64) diff --git a/llvm/test/CodeGen/SPIRV/capability-Int64Atomics.ll b/llvm/test/CodeGen/SPIRV/capability-Int64Atomics.ll --- a/llvm/test/CodeGen/SPIRV/capability-Int64Atomics.ll +++ b/llvm/test/CodeGen/SPIRV/capability-Int64Atomics.ll @@ -1,10 +1,10 @@ -; OpenCL C source: -; #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable -; #pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable -; -; void foo (volatile atomic_long *object, long desired) { -; atomic_fetch_xor(object, desired); -;} +;; OpenCL C source: +;; #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable +;; #pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable +;; +;; void foo (volatile atomic_long *object, long desired) { +;; atomic_fetch_xor(object, desired); +;; } ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s diff --git a/llvm/test/CodeGen/SPIRV/empty-module.ll b/llvm/test/CodeGen/SPIRV/empty-module.ll --- a/llvm/test/CodeGen/SPIRV/empty-module.ll +++ b/llvm/test/CodeGen/SPIRV/empty-module.ll @@ -3,6 +3,6 @@ ; CHECK-DAG: OpCapability Addresses ; CHECK-DAG: OpCapability Linkage ; CHECK-DAG: OpCapability Kernel -; CHECK: %1 = OpExtInstImport "OpenCL.std" -; CHECK: OpMemoryModel Physical64 OpenCL -; CHECK: OpSource Unknown 0 +; CHECK: %1 = OpExtInstImport "OpenCL.std" +; CHECK: OpMemoryModel Physical64 OpenCL +; CHECK: OpSource Unknown 0 diff --git a/llvm/test/CodeGen/SPIRV/relationals.ll b/llvm/test/CodeGen/SPIRV/relationals.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/relationals.ll @@ -0,0 +1,43 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +declare dso_local spir_func <4 x i8> @_Z13__spirv_IsNanIDv4_aDv4_fET_T0_(<4 x float>) +declare dso_local spir_func <4 x i8> @_Z13__spirv_IsInfIDv4_aDv4_fET_T0_(<4 x float>) +declare dso_local spir_func <4 x i8> @_Z16__spirv_IsFiniteIDv4_aDv4_fET_T0_(<4 x float>) +declare dso_local spir_func <4 x i8> @_Z16__spirv_IsNormalIDv4_aDv4_fET_T0_(<4 x float>) +declare dso_local spir_func <4 x i8> @_Z18__spirv_SignBitSetIDv4_aDv4_fET_T0_(<4 x float>) + +; CHECK-SPIRV: %[[#TBool:]] = OpTypeBool +; CHECK-SPIRV: %[[#TBoolVec:]] = OpTypeVector %[[#TBool]] + +define spir_kernel void @k() { +entry: + %arg1 = alloca <4 x float>, align 16 + %ret = alloca <4 x i8>, align 4 + %0 = load <4 x float>, <4 x float>* %arg1, align 16 + %call1 = call spir_func <4 x i8> @_Z13__spirv_IsNanIDv4_aDv4_fET_T0_(<4 x float> %0) +; CHECK-SPIRV: %[[#IsNanRes:]] = OpIsNan %[[#TBoolVec]] +; CHECK-SPIRV: %[[#SelectRes:]] = OpSelect %[[#]] %[[#IsNanRes]] +; CHECK-SPIRV: OpStore %[[#]] %[[#SelectRes]] + store <4 x i8> %call1, <4 x i8>* %ret, align 4 + %call2 = call spir_func <4 x i8> @_Z13__spirv_IsInfIDv4_aDv4_fET_T0_(<4 x float> %0) +; CHECK-SPIRV: %[[#IsInfRes:]] = OpIsInf %[[#TBoolVec]] +; CHECK-SPIRV: %[[#Select1Res:]] = OpSelect %[[#]] %[[#IsInfRes]] +; CHECK-SPIRV: OpStore %[[#]] %[[#Select1Res]] + store <4 x i8> %call2, <4 x i8>* %ret, align 4 + %call3 = call spir_func <4 x i8> @_Z16__spirv_IsFiniteIDv4_aDv4_fET_T0_(<4 x float> %0) +; CHECK-SPIRV: %[[#IsFiniteRes:]] = OpIsFinite %[[#TBoolVec]] +; CHECK-SPIRV: %[[#Select2Res:]] = OpSelect %[[#]] %[[#IsFiniteRes]] +; CHECK-SPIRV: OpStore %[[#]] %[[#Select2Res]] + store <4 x i8> %call3, <4 x i8>* %ret, align 4 + %call4 = call spir_func <4 x i8> @_Z16__spirv_IsNormalIDv4_aDv4_fET_T0_(<4 x float> %0) +; CHECK-SPIRV: %[[#IsNormalRes:]] = OpIsNormal %[[#TBoolVec]] +; CHECK-SPIRV: %[[#Select3Res:]] = OpSelect %[[#]] %[[#IsNormalRes]] +; CHECK-SPIRV: OpStore %[[#]] %[[#Select3Res]] + store <4 x i8> %call4, <4 x i8>* %ret, align 4 + %call5 = call spir_func <4 x i8> @_Z18__spirv_SignBitSetIDv4_aDv4_fET_T0_(<4 x float> %0) +; CHECK-SPIRV: %[[#SignBitSetRes:]] = OpSignBitSet %[[#TBoolVec]] +; CHECK-SPIRV: %[[#Select4Res:]] = OpSelect %[[#]] %[[#SignBitSetRes]] +; CHECK-SPIRV: OpStore %[[#]] %[[#Select4Res]] + store <4 x i8> %call5, <4 x i8>* %ret, align 4 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/select.ll b/llvm/test/CodeGen/SPIRV/select.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/select.ll @@ -0,0 +1,14 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV: OpSelect + +;; LLVM IR was generated with -cl-std=c++ option + +define spir_kernel void @test(i32 %op1, i32 %op2) { +entry: + %0 = trunc i8 undef to i1 + %call = call spir_func i32 @_Z14__spirv_Selectbii(i1 zeroext %0, i32 %op1, i32 %op2) + ret void +} + +declare spir_func i32 @_Z14__spirv_Selectbii(i1 zeroext, i32, i32) diff --git a/llvm/test/CodeGen/SPIRV/simple.ll b/llvm/test/CodeGen/SPIRV/simple.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/simple.ll @@ -0,0 +1,120 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s + +;; Support of doubles is required. +; CHECK: OpCapability Float64 +; CHECK: "fun01" +define spir_kernel void @fun01(i32 addrspace(1)* noalias %a, i32 addrspace(1)* %b, i32 %c) { +entry: + %a.addr = alloca i32 addrspace(1)*, align 8 + %b.addr = alloca i32 addrspace(1)*, align 8 + %c.addr = alloca i32, align 4 + store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 8 + store i32 addrspace(1)* %b, i32 addrspace(1)** %b.addr, align 8 + store i32 %c, i32* %c.addr, align 4 + %0 = load i32 addrspace(1)*, i32 addrspace(1)** %b.addr, align 8 + %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 0 + %1 = load i32, i32 addrspace(1)* %arrayidx, align 4 + %2 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 8 + %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %2, i64 0 + store i32 %1, i32 addrspace(1)* %arrayidx1, align 4 + %3 = load i32 addrspace(1)*, i32 addrspace(1)** %b.addr, align 8 + %cmp = icmp ugt i32 addrspace(1)* %3, null + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %4 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 8 + %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %4, i64 0 + store i32 2, i32 addrspace(1)* %arrayidx2, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +; CHECK: "fun02" +define spir_kernel void @fun02(double addrspace(1)* %a, double addrspace(1)* %b, i32 %c) { +entry: + %a.addr = alloca double addrspace(1)*, align 8 + %b.addr = alloca double addrspace(1)*, align 8 + %c.addr = alloca i32, align 4 + store double addrspace(1)* %a, double addrspace(1)** %a.addr, align 8 + store double addrspace(1)* %b, double addrspace(1)** %b.addr, align 8 + store i32 %c, i32* %c.addr, align 4 + %0 = load i32, i32* %c.addr, align 4 + %idxprom = sext i32 %0 to i64 + %1 = load double addrspace(1)*, double addrspace(1)** %b.addr, align 8 + %arrayidx = getelementptr inbounds double, double addrspace(1)* %1, i64 %idxprom + %2 = load double, double addrspace(1)* %arrayidx, align 8 + %3 = load i32, i32* %c.addr, align 4 + %idxprom1 = sext i32 %3 to i64 + %4 = load double addrspace(1)*, double addrspace(1)** %a.addr, align 8 + %arrayidx2 = getelementptr inbounds double, double addrspace(1)* %4, i64 %idxprom1 + store double %2, double addrspace(1)* %arrayidx2, align 8 + ret void +} + +; CHECK: "test_builtin" +define spir_func void @test_builtin(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { +entry: + %in.addr = alloca i32 addrspace(1)*, align 8 + %out.addr = alloca i32 addrspace(1)*, align 8 + %n = alloca i32, align 4 + store i32 addrspace(1)* %in, i32 addrspace(1)** %in.addr, align 8 + store i32 addrspace(1)* %out, i32 addrspace(1)** %out.addr, align 8 + %call = call spir_func i64 @_Z13get_global_idj(i32 0) + %conv = trunc i64 %call to i32 + store i32 %conv, i32* %n, align 4 + %0 = load i32, i32* %n, align 4 + %idxprom = sext i32 %0 to i64 + %1 = load i32 addrspace(1)*, i32 addrspace(1)** %in.addr, align 8 + %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %1, i64 %idxprom + %2 = load i32, i32 addrspace(1)* %arrayidx, align 4 + %call1 = call spir_func i32 @_Z3absi(i32 %2) + %3 = load i32, i32* %n, align 4 + %idxprom2 = sext i32 %3 to i64 + %4 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 8 + %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %4, i64 %idxprom2 + store i32 %call1, i32 addrspace(1)* %arrayidx3, align 4 + ret void +} + +; CHECK-NOT: "_Z13get_global_idj" +declare spir_func i64 @_Z13get_global_idj(i32) + +; CHECK-NOT: "_Z3absi" +declare spir_func i32 @_Z3absi(i32) + +; CHECK: "myabs" +define spir_func i32 @myabs(i32 %x) { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + %0 = load i32, i32* %x.addr, align 4 + %call = call spir_func i32 @_Z3absi(i32 %0) + ret i32 %call +} + +; CHECK: "test_function_call" +define spir_func void @test_function_call(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { +entry: + %in.addr = alloca i32 addrspace(1)*, align 8 + %out.addr = alloca i32 addrspace(1)*, align 8 + %n = alloca i32, align 4 + store i32 addrspace(1)* %in, i32 addrspace(1)** %in.addr, align 8 + store i32 addrspace(1)* %out, i32 addrspace(1)** %out.addr, align 8 + %call = call spir_func i64 @_Z13get_global_idj(i32 0) + %conv = trunc i64 %call to i32 + store i32 %conv, i32* %n, align 4 + %0 = load i32, i32* %n, align 4 + %idxprom = sext i32 %0 to i64 + %1 = load i32 addrspace(1)*, i32 addrspace(1)** %in.addr, align 8 + %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %1, i64 %idxprom + %2 = load i32, i32 addrspace(1)* %arrayidx, align 4 + %call1 = call spir_func i32 @myabs(i32 %2) + %3 = load i32, i32* %n, align 4 + %idxprom2 = sext i32 %3 to i64 + %4 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 8 + %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %4, i64 %idxprom2 + store i32 %call1, i32 addrspace(1)* %arrayidx3, align 4 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/spirv-tools-dis.ll b/llvm/test/CodeGen/SPIRV/spirv-tools-dis.ll --- a/llvm/test/CodeGen/SPIRV/spirv-tools-dis.ll +++ b/llvm/test/CodeGen/SPIRV/spirv-tools-dis.ll @@ -1,7 +1,7 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s -; CHECK: %{{[0-9]+}} = OpExtInstImport "OpenCL.std" -; CHECK: %{{[0-9]+}} = OpTypeInt 32 0 +; CHECK: %[[#]] = OpExtInstImport "OpenCL.std" +; CHECK: %[[#]] = OpTypeInt 32 0 define spir_kernel void @foo(i32 addrspace(1)* %a) { entry: diff --git a/llvm/test/CodeGen/SPIRV/transcoding/AtomicCompareExchangeExplicit_cl20.ll b/llvm/test/CodeGen/SPIRV/transcoding/AtomicCompareExchangeExplicit_cl20.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/AtomicCompareExchangeExplicit_cl20.ll @@ -0,0 +1,59 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +;; __kernel void testAtomicCompareExchangeExplicit_cl20( +;; volatile global atomic_int* object, +;; global int* expected, +;; int desired) +;; { + ;; Values of memory order and memory scope arguments correspond to SPIR-2.0 spec. +;; atomic_compare_exchange_strong_explicit(object, expected, desired, +;; memory_order_release, // 3 +;; memory_order_relaxed // 0 +;; ); // by default, assume device scope = 2 +;; atomic_compare_exchange_strong_explicit(object, expected, desired, +;; memory_order_acq_rel, // 4 +;; memory_order_relaxed, // 0 +;; memory_scope_work_group // 1 +;; ); +;; atomic_compare_exchange_weak_explicit(object, expected, desired, +;; memory_order_release, // 3 +;; memory_order_relaxed // 0 +;; ); // by default, assume device scope = 2 +;; atomic_compare_exchange_weak_explicit(object, expected, desired, +;; memory_order_acq_rel, // 4 +;; memory_order_relaxed, // 0 +;; memory_scope_work_group // 1 +;; ); +;; } + +; CHECK-SPIRV: %[[#int:]] = OpTypeInt 32 0 +;; Constants below correspond to the SPIR-V spec +; CHECK-SPIRV-DAG: %[[#DeviceScope:]] = OpConstant %[[#int]] 1 +; CHECK-SPIRV-DAG: %[[#WorkgroupScope:]] = OpConstant %[[#int]] 2 +; CHECK-SPIRV-DAG: %[[#ReleaseMemSem:]] = OpConstant %[[#int]] 4 +; CHECK-SPIRV-DAG: %[[#RelaxedMemSem:]] = OpConstant %[[#int]] 0 +; CHECK-SPIRV-DAG: %[[#AcqRelMemSem:]] = OpConstant %[[#int]] 8 + +; CHECK-SPIRV: %[[#]] = OpAtomicCompareExchange %[[#]] %[[#]] %[[#DeviceScope]] %[[#ReleaseMemSem]] %[[#RelaxedMemSem]] +; CHECK-SPIRV: %[[#]] = OpAtomicCompareExchange %[[#]] %[[#]] %[[#WorkgroupScope]] %[[#AcqRelMemSem]] %[[#RelaxedMemSem]] +; CHECK-SPIRV: %[[#]] = OpAtomicCompareExchangeWeak %[[#]] %[[#]] %[[#DeviceScope]] %[[#ReleaseMemSem]] %[[#RelaxedMemSem]] +; CHECK-SPIRV: %[[#]] = OpAtomicCompareExchangeWeak %[[#]] %[[#]] %[[#WorkgroupScope]] %[[#AcqRelMemSem]] %[[#RelaxedMemSem]] + +define dso_local spir_kernel void @testAtomicCompareExchangeExplicit_cl20(i32 addrspace(1)* noundef %object, i32 addrspace(1)* noundef %expected, i32 noundef %desired) local_unnamed_addr { +entry: + %0 = addrspacecast i32 addrspace(1)* %object to i32 addrspace(4)* + %1 = addrspacecast i32 addrspace(1)* %expected to i32 addrspace(4)* + %call = call spir_func zeroext i1 @_Z39atomic_compare_exchange_strong_explicitPU3AS4VU7_AtomiciPU3AS4ii12memory_orderS4_(i32 addrspace(4)* noundef %0, i32 addrspace(4)* noundef %1, i32 noundef %desired, i32 noundef 3, i32 noundef 0) + %call1 = call spir_func zeroext i1 @_Z39atomic_compare_exchange_strong_explicitPU3AS4VU7_AtomiciPU3AS4ii12memory_orderS4_12memory_scope(i32 addrspace(4)* noundef %0, i32 addrspace(4)* noundef %1, i32 noundef %desired, i32 noundef 4, i32 noundef 0, i32 noundef 1) + %call2 = call spir_func zeroext i1 @_Z37atomic_compare_exchange_weak_explicitPU3AS4VU7_AtomiciPU3AS4ii12memory_orderS4_(i32 addrspace(4)* noundef %0, i32 addrspace(4)* noundef %1, i32 noundef %desired, i32 noundef 3, i32 noundef 0) + %call3 = call spir_func zeroext i1 @_Z37atomic_compare_exchange_weak_explicitPU3AS4VU7_AtomiciPU3AS4ii12memory_orderS4_12memory_scope(i32 addrspace(4)* noundef %0, i32 addrspace(4)* noundef %1, i32 noundef %desired, i32 noundef 4, i32 noundef 0, i32 noundef 1) + ret void +} + +declare spir_func zeroext i1 @_Z39atomic_compare_exchange_strong_explicitPU3AS4VU7_AtomiciPU3AS4ii12memory_orderS4_(i32 addrspace(4)* noundef, i32 addrspace(4)* noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr + +declare spir_func zeroext i1 @_Z39atomic_compare_exchange_strong_explicitPU3AS4VU7_AtomiciPU3AS4ii12memory_orderS4_12memory_scope(i32 addrspace(4)* noundef, i32 addrspace(4)* noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr + +declare spir_func zeroext i1 @_Z37atomic_compare_exchange_weak_explicitPU3AS4VU7_AtomiciPU3AS4ii12memory_orderS4_(i32 addrspace(4)* noundef, i32 addrspace(4)* noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr + +declare spir_func zeroext i1 @_Z37atomic_compare_exchange_weak_explicitPU3AS4VU7_AtomiciPU3AS4ii12memory_orderS4_12memory_scope(i32 addrspace(4)* noundef, i32 addrspace(4)* noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr diff --git a/llvm/test/CodeGen/SPIRV/transcoding/BuildNDRange.ll b/llvm/test/CodeGen/SPIRV/transcoding/BuildNDRange.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/BuildNDRange.ll @@ -0,0 +1,16 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV-DAG: %[[#]] = OpBuildNDRange %[[#]] %[[#GWS:]] %[[#LWS:]] %[[#GWO:]] +; CHECK-SPIRV-DAG: %[[#GWS]] = OpConstant %[[#]] 123 +; CHECK-SPIRV-DAG: %[[#LWS]] = OpConstant %[[#]] 456 +; CHECK-SPIRV-DAG: %[[#GWO]] = OpConstant %[[#]] 0 + +%struct.ndrange_t = type { i32, [3 x i32], [3 x i32], [3 x i32] } + +define spir_kernel void @test() { + %ndrange = alloca %struct.ndrange_t, align 4 + call spir_func void @_Z10ndrange_1Djj(%struct.ndrange_t* sret(%struct.ndrange_t*) %ndrange, i32 123, i32 456) + ret void +} + +declare spir_func void @_Z10ndrange_1Djj(%struct.ndrange_t* sret(%struct.ndrange_t*), i32, i32) diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpAllAny.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpAllAny.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpAllAny.ll @@ -0,0 +1,51 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +;; This test checks SYCL relational builtin any and all with vector input types. + +; CHECK-SPIRV: %[[#BoolTypeID:]] = OpTypeBool + +; CHECK-SPIRV: OpAny %[[#BoolTypeID]] +; CHECK-SPIRV: OpAny %[[#BoolTypeID]] +; CHECK-SPIRV: OpAny %[[#BoolTypeID]] +; CHECK-SPIRV: OpAny %[[#BoolTypeID]] +; CHECK-SPIRV: OpAll %[[#BoolTypeID]] +; CHECK-SPIRV: OpAll %[[#BoolTypeID]] +; CHECK-SPIRV: OpAll %[[#BoolTypeID]] +; CHECK-SPIRV: OpAll %[[#BoolTypeID]] + +define dso_local spir_func void @test_vector(i32 addrspace(4)* nocapture writeonly %out, <2 x i8> %c, <2 x i16> %s, <2 x i32> %i, <2 x i64> %l) local_unnamed_addr { +entry: + %call = tail call spir_func i32 @_Z3anyDv2_c(<2 x i8> %c) + %call1 = tail call spir_func i32 @_Z3anyDv2_s(<2 x i16> %s) + %add = add nsw i32 %call1, %call + %call2 = tail call spir_func i32 @_Z3anyDv2_i(<2 x i32> %i) + %add3 = add nsw i32 %add, %call2 + %call4 = tail call spir_func i32 @_Z3anyDv2_l(<2 x i64> %l) + %add5 = add nsw i32 %add3, %call4 + %call6 = tail call spir_func i32 @_Z3allDv2_c(<2 x i8> %c) + %add7 = add nsw i32 %add5, %call6 + %call8 = tail call spir_func i32 @_Z3allDv2_s(<2 x i16> %s) + %add9 = add nsw i32 %add7, %call8 + %call10 = tail call spir_func i32 @_Z3allDv2_i(<2 x i32> %i) + %add11 = add nsw i32 %add9, %call10 + %call12 = tail call spir_func i32 @_Z3allDv2_l(<2 x i64> %l) + %add13 = add nsw i32 %add11, %call12 + store i32 %add13, i32 addrspace(4)* %out, align 4 + ret void +} + +declare spir_func i32 @_Z3anyDv2_c(<2 x i8>) local_unnamed_addr + +declare spir_func i32 @_Z3anyDv2_s(<2 x i16>) local_unnamed_addr + +declare spir_func i32 @_Z3anyDv2_i(<2 x i32>) local_unnamed_addr + +declare spir_func i32 @_Z3anyDv2_l(<2 x i64>) local_unnamed_addr + +declare spir_func i32 @_Z3allDv2_c(<2 x i8>) local_unnamed_addr + +declare spir_func i32 @_Z3allDv2_s(<2 x i16>) local_unnamed_addr + +declare spir_func i32 @_Z3allDv2_i(<2 x i32>) local_unnamed_addr + +declare spir_func i32 @_Z3allDv2_l(<2 x i64>) local_unnamed_addr diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpDot.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpDot.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpDot.ll @@ -0,0 +1,30 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +;; The OpDot operands must be vectors; check that translating dot with +;; scalar arguments does not result in OpDot. +; CHECK-SPIRV-LABEL: %[[#]] = OpFunction %[[#]] None %[[#]] +; CHECK-SPIRV: %[[#]] = OpFMul %[[#]] %[[#]] %[[#]] +; CHECK-SPIRV-NOT: %[[#]] = OpDot %[[#]] %[[#]] %[[#]] +; CHECK-SPIRV: OpFunctionEnd + +define spir_kernel void @testScalar(float %f) { +entry: + %call = tail call spir_func float @_Z3dotff(float %f, float %f) + ret void +} + +;; The OpDot operands must be vectors; check that translating dot with +;; vector arguments results in OpDot. +; CHECK-SPIRV-LABEL: %[[#]] = OpFunction %[[#]] None %[[#]] +; CHECK-SPIRV: %[[#]] = OpDot %[[#]] %[[#]] %[[#]] +; CHECK-SPIRV: OpFunctionEnd + +define spir_kernel void @testVector(<2 x float> %f) { +entry: + %call = tail call spir_func float @_Z3dotDv2_fS_(<2 x float> %f, <2 x float> %f) + ret void +} + +declare spir_func float @_Z3dotff(float, float) + +declare spir_func float @_Z3dotDv2_fS_(<2 x float>, <2 x float>) diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAllAny.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAllAny.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAllAny.ll @@ -0,0 +1,18 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV: OpCapability Groups +; CHECK-SPIRV: %[[#BoolTypeID:]] = OpTypeBool +; CHECK-SPIRV: %[[#ConstID:]] = OpConstantTrue %[[#BoolTypeID]] +; CHECK-SPIRV: %[[#]] = OpGroupAll %[[#BoolTypeID]] %[[#]] %[[#ConstID]] +; CHECK-SPIRV: %[[#]] = OpGroupAny %[[#BoolTypeID]] %[[#]] %[[#ConstID]] + +define spir_kernel void @test(i32 addrspace(1)* nocapture readnone %i) { +entry: + %call = tail call spir_func i32 @_Z14work_group_alli(i32 5) + %call1 = tail call spir_func i32 @_Z14work_group_anyi(i32 5) + ret void +} + +declare spir_func i32 @_Z14work_group_alli(i32) + +declare spir_func i32 @_Z14work_group_anyi(i32) diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/atomic_cmpxchg.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/atomic_cmpxchg.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/atomic_cmpxchg.ll @@ -0,0 +1,52 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +;; This test checks that the backend is capable to correctly translate +;; atomic_cmpxchg OpenCL C 1.2 built-in function [1] into corresponding SPIR-V +;; instruction. + +;; __kernel void test_atomic_cmpxchg(__global int *p, int cmp, int val) { +;; atomic_cmpxchg(p, cmp, val); +;; +;; __global unsigned int *up = (__global unsigned int *)p; +;; unsigned int ucmp = (unsigned int)cmp; +;; unsigned int uval = (unsigned int)val; +;; atomic_cmpxchg(up, ucmp, uval); +;; } + +; CHECK-SPIRV: OpName %[[#TEST:]] "test_atomic_cmpxchg" +; CHECK-SPIRV-DAG: %[[#UINT:]] = OpTypeInt 32 0 +; CHECK-SPIRV-DAG: %[[#UINT_PTR:]] = OpTypePointer CrossWorkgroup %[[#UINT]] + +;; In SPIR-V, atomic_cmpxchg is represented as OpAtomicCompareExchange [2], +;; which also includes memory scope and two memory semantic arguments. The +;; backend applies some default memory order for it and therefore, constants +;; below include a bit more information than original source + +;; 0x2 Workgroup +; CHECK-SPIRV-DAG: %[[#WORKGROUP_SCOPE:]] = OpConstant %[[#UINT]] 2 + +;; 0x0 Relaxed +;; TODO: do we need CrossWorkgroupMemory here as well? +; CHECK-SPIRV-DAG: %[[#RELAXED:]] = OpConstant %[[#UINT]] 0 + +; CHECK-SPIRV: %[[#TEST]] = OpFunction %[[#]] +; CHECK-SPIRV: %[[#PTR:]] = OpFunctionParameter %[[#UINT_PTR]] +; CHECK-SPIRV: %[[#CMP:]] = OpFunctionParameter %[[#UINT]] +; CHECK-SPIRV: %[[#VAL:]] = OpFunctionParameter %[[#UINT]] +; CHECK-SPIRV: %[[#]] = OpAtomicCompareExchange %[[#UINT]] %[[#PTR]] %[[#WORKGROUP_SCOPE]] %[[#RELAXED]] %[[#RELAXED]] %[[#VAL]] %[[#CMP]] +; CHECK-SPIRV: %[[#]] = OpAtomicCompareExchange %[[#UINT]] %[[#PTR]] %[[#WORKGROUP_SCOPE]] %[[#RELAXED]] %[[#RELAXED]] %[[#VAL]] %[[#CMP]] + +define dso_local spir_kernel void @test_atomic_cmpxchg(i32 addrspace(1)* noundef %p, i32 noundef %cmp, i32 noundef %val) local_unnamed_addr { +entry: + %call = tail call spir_func i32 @_Z14atomic_cmpxchgPU3AS1Viii(i32 addrspace(1)* noundef %p, i32 noundef %cmp, i32 noundef %val) + %call1 = tail call spir_func i32 @_Z14atomic_cmpxchgPU3AS1Vjjj(i32 addrspace(1)* noundef %p, i32 noundef %cmp, i32 noundef %val) + ret void +} + +declare spir_func i32 @_Z14atomic_cmpxchgPU3AS1Viii(i32 addrspace(1)* noundef, i32 noundef, i32 noundef) local_unnamed_addr + +declare spir_func i32 @_Z14atomic_cmpxchgPU3AS1Vjjj(i32 addrspace(1)* noundef, i32 noundef, i32 noundef) local_unnamed_addr + +;; References: +;; [1]: https://www.khronos.org/registry/OpenCL/sdk/2.0/docs/man/xhtml/atomic_cmpxchg.html +;; [2]: https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpAtomicCompareExchange diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/atomic_legacy.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/atomic_legacy.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/atomic_legacy.ll @@ -0,0 +1,46 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +;; This test checks that the backend is capable to correctly translate +;; legacy atomic OpenCL C 1.2 built-in functions [1] into corresponding SPIR-V +;; instruction. + +;; __kernel void test_legacy_atomics(__global int *p, int val) { +;; atom_add(p, val); // from cl_khr_global_int32_base_atomics +;; atomic_add(p, val); // from OpenCL C 1.1 +;; } + +; CHECK-SPIRV: OpName %[[#TEST:]] "test_legacy_atomics" +; CHECK-SPIRV-DAG: %[[#UINT:]] = OpTypeInt 32 0 +; CHECK-SPIRV-DAG: %[[#UINT_PTR:]] = OpTypePointer CrossWorkgroup %[[#UINT]] + +;; In SPIR-V, atomic_add is represented as OpAtomicIAdd [2], which also includes +;; memory scope and memory semantic arguments. The backend applies a default +;; memory scope and memory order for it and therefore, constants below include +;; a bit more information than original source + +;; 0x2 Workgroup +; CHECK-SPIRV-DAG: %[[#WORKGROUP_SCOPE:]] = OpConstant %[[#UINT]] 2 + +;; 0x0 Relaxed +; CHECK-SPIRV-DAG: %[[#RELAXED:]] = OpConstant %[[#UINT]] 0 + +; CHECK-SPIRV: %[[#TEST]] = OpFunction %[[#]] +; CHECK-SPIRV: %[[#PTR:]] = OpFunctionParameter %[[#UINT_PTR]] +; CHECK-SPIRV: %[[#VAL:]] = OpFunctionParameter %[[#UINT]] +; CHECK-SPIRV: %[[#]] = OpAtomicIAdd %[[#UINT]] %[[#PTR]] %[[#WORKGROUP_SCOPE]] %[[#RELAXED]] %[[#VAL]] +; CHECK-SPIRV: %[[#]] = OpAtomicIAdd %[[#UINT]] %[[#PTR]] %[[#WORKGROUP_SCOPE]] %[[#RELAXED]] %[[#VAL]] + +define dso_local spir_kernel void @test_legacy_atomics(i32 addrspace(1)* noundef %p, i32 noundef %val) local_unnamed_addr { +entry: + %call = tail call spir_func i32 @_Z8atom_addPU3AS1Vii(i32 addrspace(1)* noundef %p, i32 noundef %val) + %call1 = tail call spir_func i32 @_Z10atomic_addPU3AS1Vii(i32 addrspace(1)* noundef %p, i32 noundef %val) + ret void +} + +declare spir_func i32 @_Z8atom_addPU3AS1Vii(i32 addrspace(1)* noundef, i32 noundef) local_unnamed_addr + +declare spir_func i32 @_Z10atomic_addPU3AS1Vii(i32 addrspace(1)* noundef, i32 noundef) local_unnamed_addr + +;; References: +;; [1]: https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_C.html#atomic-legacy +;; [2]: https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpAtomicIAdd diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/atomic_work_item_fence.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/atomic_work_item_fence.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/atomic_work_item_fence.ll @@ -0,0 +1,77 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +;; This test checks that the backend is capable to correctly translate +;; atomic_work_item_fence OpenCL C 2.0 built-in function [1] into corresponding +;; SPIR-V instruction [2]. + +;; __kernel void test_mem_fence_const_flags() { +;; atomic_work_item_fence(CLK_LOCAL_MEM_FENCE, memory_order_relaxed, memory_scope_work_item); +;; atomic_work_item_fence(CLK_GLOBAL_MEM_FENCE, memory_order_acquire, memory_scope_work_group); +;; atomic_work_item_fence(CLK_IMAGE_MEM_FENCE, memory_order_release, memory_scope_device); +;; atomic_work_item_fence(CLK_LOCAL_MEM_FENCE, memory_order_acq_rel, memory_scope_all_svm_devices); +;; atomic_work_item_fence(CLK_GLOBAL_MEM_FENCE, memory_order_seq_cst, memory_scope_sub_group); +;; atomic_work_item_fence(CLK_IMAGE_MEM_FENCE | CLK_LOCAL_MEM_FENCE, memory_order_acquire, memory_scope_sub_group); +;; } + +;; __kernel void test_mem_fence_non_const_flags(cl_mem_fence_flags flags, memory_order order, memory_scope scope) { +;; // FIXME: OpenCL spec doesn't require flags to be compile-time known +;; // atomic_work_item_fence(flags, order, scope); +;; } + +; CHECK-SPIRV: OpName %[[#TEST_CONST_FLAGS:]] "test_mem_fence_const_flags" +; CHECK-SPIRV: %[[#UINT:]] = OpTypeInt 32 0 + +;; 0x0 Relaxed + 0x100 WorkgroupMemory +; CHECK-SPIRV-DAG: %[[#LOCAL_RELAXED:]] = OpConstant %[[#UINT]] 256 +;; 0x2 Acquire + 0x200 CrossWorkgroupMemory +; CHECK-SPIRV-DAG: %[[#GLOBAL_ACQUIRE:]] = OpConstant %[[#UINT]] 514 +;; 0x4 Release + 0x800 ImageMemory +; CHECK-SPIRV-DAG: %[[#IMAGE_RELEASE:]] = OpConstant %[[#UINT]] 2052 +;; 0x8 AcquireRelease + 0x100 WorkgroupMemory +; CHECK-SPIRV-DAG: %[[#LOCAL_ACQ_REL:]] = OpConstant %[[#UINT]] 264 +;; 0x10 SequentiallyConsistent + 0x200 CrossWorkgroupMemory +; CHECK-SPIRV-DAG: %[[#GLOBAL_SEQ_CST:]] = OpConstant %[[#UINT]] 528 +;; 0x2 Acquire + 0x100 WorkgroupMemory + 0x800 ImageMemory +; CHECK-SPIRV-DAG: %[[#LOCAL_IMAGE_ACQUIRE:]] = OpConstant %[[#UINT]] 2306 + +;; Scopes [4]: +;; 4 Invocation +; CHECK-SPIRV-DAG: %[[#SCOPE_INVOCATION:]] = OpConstant %[[#UINT]] 4 +;; 2 Workgroup +; CHECK-SPIRV-DAG: %[[#SCOPE_WORK_GROUP:]] = OpConstant %[[#UINT]] 2 +;; 1 Device +; CHECK-SPIRV-DAG: %[[#SCOPE_DEVICE:]] = OpConstant %[[#UINT]] 1 +;; 0 CrossDevice +; CHECK-SPIRV-DAG: %[[#SCOPE_CROSS_DEVICE:]] = OpConstant %[[#UINT]] 0 +;; 3 Subgroup +; CHECK-SPIRV-DAG: %[[#SCOPE_SUBGROUP:]] = OpConstant %[[#UINT]] 3 + +; CHECK-SPIRV: %[[#TEST_CONST_FLAGS]] = OpFunction %[[#]] +; CHECK-SPIRV: OpMemoryBarrier %[[#SCOPE_INVOCATION]] %[[#LOCAL_RELAXED]] +; CHECK-SPIRV: OpMemoryBarrier %[[#SCOPE_WORK_GROUP]] %[[#GLOBAL_ACQUIRE]] +; CHECK-SPIRV: OpMemoryBarrier %[[#SCOPE_DEVICE]] %[[#IMAGE_RELEASE]] +; CHECK-SPIRV: OpMemoryBarrier %[[#SCOPE_CROSS_DEVICE]] %[[#LOCAL_ACQ_REL]] +; CHECK-SPIRV: OpMemoryBarrier %[[#SCOPE_SUBGROUP]] %[[#GLOBAL_SEQ_CST]] +; CHECK-SPIRV: OpMemoryBarrier %[[#SCOPE_SUBGROUP]] %[[#LOCAL_IMAGE_ACQUIRE]] + +define dso_local spir_kernel void @test_mem_fence_const_flags() local_unnamed_addr { +entry: + tail call spir_func void @_Z22atomic_work_item_fencej12memory_order12memory_scope(i32 noundef 1, i32 noundef 0, i32 noundef 0) + tail call spir_func void @_Z22atomic_work_item_fencej12memory_order12memory_scope(i32 noundef 2, i32 noundef 2, i32 noundef 1) + tail call spir_func void @_Z22atomic_work_item_fencej12memory_order12memory_scope(i32 noundef 4, i32 noundef 3, i32 noundef 2) + tail call spir_func void @_Z22atomic_work_item_fencej12memory_order12memory_scope(i32 noundef 1, i32 noundef 4, i32 noundef 3) + tail call spir_func void @_Z22atomic_work_item_fencej12memory_order12memory_scope(i32 noundef 2, i32 noundef 5, i32 noundef 4) + tail call spir_func void @_Z22atomic_work_item_fencej12memory_order12memory_scope(i32 noundef 5, i32 noundef 2, i32 noundef 4) + ret void +} + +declare spir_func void @_Z22atomic_work_item_fencej12memory_order12memory_scope(i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr + +define dso_local spir_kernel void @test_mem_fence_non_const_flags(i32 noundef %flags, i32 noundef %order, i32 noundef %scope) local_unnamed_addr { +entry: + ret void +} + +;; References: +;; [1]: https://www.khronos.org/registry/OpenCL/sdk/2.0/docs/man/xhtml/atomic_work_item_fence.html +;; [2]: https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpMemoryBarrier diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/barrier.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/barrier.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/barrier.ll @@ -0,0 +1,80 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +;; This test checks that the backend is capable to correctly translate +;; barrier OpenCL C 1.2 built-in function [1] into corresponding SPIR-V +;; instruction. + +;; FIXME: Strictly speaking, this flag is not supported by barrier in OpenCL 1.2 +;; #define CLK_IMAGE_MEM_FENCE 0x04 +;; +;; void __attribute__((overloadable)) __attribute__((convergent)) barrier(cl_mem_fence_flags); +;; +;; __kernel void test_barrier_const_flags() { +;; barrier(CLK_LOCAL_MEM_FENCE); +;; barrier(CLK_GLOBAL_MEM_FENCE); +;; barrier(CLK_IMAGE_MEM_FENCE); +;; +;; barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); +;; barrier(CLK_LOCAL_MEM_FENCE | CLK_IMAGE_MEM_FENCE); +;; barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE | CLK_IMAGE_MEM_FENCE); +;; } +;; +;; __kernel void test_barrier_non_const_flags(cl_mem_fence_flags flags) { + ;; FIXME: OpenCL spec doesn't require flags to be compile-time known + ;; barrier(flags); +;; } + +; CHECK-SPIRV: OpName %[[#TEST_CONST_FLAGS:]] "test_barrier_const_flags" +; CHECK-SPIRV: %[[#UINT:]] = OpTypeInt 32 0 + +;; In SPIR-V, barrier is represented as OpControlBarrier [3] and OpenCL +;; cl_mem_fence_flags are represented as part of Memory Semantics [2], which +;; also includes memory order constraints. The backend applies some default +;; memory order for OpControlBarrier and therefore, constants below include a +;; bit more information than original source + +;; 0x10 SequentiallyConsistent + 0x100 WorkgroupMemory +; CHECK-SPIRV: %[[#LOCAL:]] = OpConstant %[[#UINT]] 272 +;; 0x2 Workgroup +; CHECK-SPIRV: %[[#WG:]] = OpConstant %[[#UINT]] 2 +;; 0x10 SequentiallyConsistent + 0x200 CrossWorkgroupMemory +; CHECK-SPIRV-DAG: %[[#GLOBAL:]] = OpConstant %[[#UINT]] 528 +;; 0x10 SequentiallyConsistent + 0x800 ImageMemory +; CHECK-SPIRV-DAG: %[[#IMAGE:]] = OpConstant %[[#UINT]] 2064 +;; 0x10 SequentiallyConsistent + 0x100 WorkgroupMemory + 0x200 CrossWorkgroupMemory +; CHECK-SPIRV-DAG: %[[#LOCAL_GLOBAL:]] = OpConstant %[[#UINT]] 784 +;; 0x10 SequentiallyConsistent + 0x100 WorkgroupMemory + 0x800 ImageMemory +; CHECK-SPIRV-DAG: %[[#LOCAL_IMAGE:]] = OpConstant %[[#UINT]] 2320 +;; 0x10 SequentiallyConsistent + 0x100 WorkgroupMemory + 0x200 CrossWorkgroupMemory + 0x800 ImageMemory +; CHECK-SPIRV-DAG: %[[#LOCAL_GLOBAL_IMAGE:]] = OpConstant %[[#UINT]] 2832 + +; CHECK-SPIRV: %[[#TEST_CONST_FLAGS]] = OpFunction %[[#]] +; CHECK-SPIRV: OpControlBarrier %[[#WG]] %[[#WG]] %[[#LOCAL]] +; CHECK-SPIRV: OpControlBarrier %[[#WG]] %[[#WG]] %[[#GLOBAL]] +; CHECK-SPIRV: OpControlBarrier %[[#WG]] %[[#WG]] %[[#IMAGE]] +; CHECK-SPIRV: OpControlBarrier %[[#WG]] %[[#WG]] %[[#LOCAL_GLOBAL]] +; CHECK-SPIRV: OpControlBarrier %[[#WG]] %[[#WG]] %[[#LOCAL_IMAGE]] +; CHECK-SPIRV: OpControlBarrier %[[#WG]] %[[#WG]] %[[#LOCAL_GLOBAL_IMAGE]] + +define dso_local spir_kernel void @test_barrier_const_flags() local_unnamed_addr { +entry: + tail call spir_func void @_Z7barrierj(i32 noundef 1) + tail call spir_func void @_Z7barrierj(i32 noundef 2) + tail call spir_func void @_Z7barrierj(i32 noundef 4) + tail call spir_func void @_Z7barrierj(i32 noundef 3) + tail call spir_func void @_Z7barrierj(i32 noundef 5) + tail call spir_func void @_Z7barrierj(i32 noundef 7) + ret void +} + +declare spir_func void @_Z7barrierj(i32 noundef) local_unnamed_addr + +define dso_local spir_kernel void @test_barrier_non_const_flags(i32 noundef %flags) local_unnamed_addr { +entry: + ret void +} + +;; References: +;; [1]: https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/barrier.html +;; [2]: https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#_a_id_memory_semantics__id_a_memory_semantics_lt_id_gt +;; [3]: https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpControlBarrier diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/sub_group_mask.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/sub_group_mask.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/sub_group_mask.ll @@ -0,0 +1,18 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV: OpCapability GroupNonUniformBallot +; CHECK-SPIRV: OpDecorate %[[#]] BuiltIn SubgroupGtMask + +;; kernel void test_mask(global uint4 *out) +;; { +;; *out = get_sub_group_gt_mask(); +;; } + +define dso_local spir_kernel void @test_mask(<4 x i32> addrspace(1)* nocapture noundef writeonly %out) local_unnamed_addr { +entry: + %call = tail call spir_func <4 x i32> @_Z21get_sub_group_gt_maskv() + store <4 x i32> %call, <4 x i32> addrspace(1)* %out, align 16 + ret void +} + +declare spir_func <4 x i32> @_Z21get_sub_group_gt_maskv() local_unnamed_addr diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/work_group_barrier.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/work_group_barrier.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/work_group_barrier.ll @@ -0,0 +1,111 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +;; This test checks that the backend is capable to correctly translate +;; sub_group_barrier built-in function [1] from cl_khr_subgroups extension into +;; corresponding SPIR-V instruction. + +;; __kernel void test_barrier_const_flags() { +;; work_group_barrier(CLK_LOCAL_MEM_FENCE); +;; work_group_barrier(CLK_GLOBAL_MEM_FENCE); +;; work_group_barrier(CLK_IMAGE_MEM_FENCE); +;; +;; work_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); +;; work_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_IMAGE_MEM_FENCE); +;; work_group_barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE | CLK_IMAGE_MEM_FENCE); +;; +;; work_group_barrier(CLK_LOCAL_MEM_FENCE, memory_scope_work_item); +;; work_group_barrier(CLK_LOCAL_MEM_FENCE, memory_scope_work_group); +;; work_group_barrier(CLK_LOCAL_MEM_FENCE, memory_scope_device); +;; work_group_barrier(CLK_LOCAL_MEM_FENCE, memory_scope_all_svm_devices); +;; work_group_barrier(CLK_LOCAL_MEM_FENCE, memory_scope_sub_group); +;; + ;; barrier should also work (preserved for backward compatibility) +;; barrier(CLK_GLOBAL_MEM_FENCE); +;; } +;; +;; __kernel void test_barrier_non_const_flags(cl_mem_fence_flags flags, memory_scope scope) { + ;; FIXME: OpenCL spec doesn't require flags to be compile-time known + ;; work_group_barrier(flags); + ;; work_group_barrier(flags, scope); +;; } + +; CHECK-SPIRV: OpName %[[#TEST_CONST_FLAGS:]] "test_barrier_const_flags" +; CHECK-SPIRV: %[[#UINT:]] = OpTypeInt 32 0 + +;; In SPIR-V, barrier is represented as OpControlBarrier [2] and OpenCL +;; cl_mem_fence_flags are represented as part of Memory Semantics [3], which +;; also includes memory order constraints. The backend applies some default +;; memory order for OpControlBarrier and therefore, constants below include a +;; bit more information than original source + +;; 0x10 SequentiallyConsistent + 0x100 WorkgroupMemory +; CHECK-SPIRV-DAG: %[[#LOCAL:]] = OpConstant %[[#UINT]] 272 +;; 0x10 SequentiallyConsistent + 0x200 CrossWorkgroupMemory +; CHECK-SPIRV-DAG: %[[#GLOBAL:]] = OpConstant %[[#UINT]] 528 +;; 0x10 SequentiallyConsistent + 0x800 ImageMemory +; CHECK-SPIRV-DAG: %[[#IMAGE:]] = OpConstant %[[#UINT]] 2064 +;; 0x10 SequentiallyConsistent + 0x100 WorkgroupMemory + 0x200 CrossWorkgroupMemory +; CHECK-SPIRV-DAG: %[[#LOCAL_GLOBAL:]] = OpConstant %[[#UINT]] 784 +;; 0x10 SequentiallyConsistent + 0x100 WorkgroupMemory + 0x800 ImageMemory +; CHECK-SPIRV-DAG: %[[#LOCAL_IMAGE:]] = OpConstant %[[#UINT]] 2320 +;; 0x10 SequentiallyConsistent + 0x100 WorkgroupMemory + 0x200 CrossWorkgroupMemory + 0x800 ImageMemory +; CHECK-SPIRV-DAG: %[[#LOCAL_GLOBAL_IMAGE:]] = OpConstant %[[#UINT]] 2832 + +;; Scopes [4]: +;; 2 Workgroup +; CHECK-SPIRV-DAG: %[[#SCOPE_WORK_GROUP:]] = OpConstant %[[#UINT]] 2 +;; 4 Invocation +; CHECK-SPIRV-DAG: %[[#SCOPE_INVOCATION:]] = OpConstant %[[#UINT]] 4 +;; 1 Device +; CHECK-SPIRV-DAG: %[[#SCOPE_DEVICE:]] = OpConstant %[[#UINT]] 1 +;; 0 CrossDevice +; CHECK-SPIRV-DAG: %[[#SCOPE_CROSS_DEVICE:]] = OpConstant %[[#UINT]] 0 +;; 3 Subgroup +; CHECK-SPIRV-DAG: %[[#SCOPE_SUBGROUP:]] = OpConstant %[[#UINT]] 3 + +; CHECK-SPIRV: %[[#TEST_CONST_FLAGS]] = OpFunction %[[#]] +; CHECK-SPIRV: OpControlBarrier %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#LOCAL]] +; CHECK-SPIRV: OpControlBarrier %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#GLOBAL]] +; CHECK-SPIRV: OpControlBarrier %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#IMAGE]] +; CHECK-SPIRV: OpControlBarrier %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#LOCAL_GLOBAL]] +; CHECK-SPIRV: OpControlBarrier %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#LOCAL_IMAGE]] +; CHECK-SPIRV: OpControlBarrier %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#LOCAL_GLOBAL_IMAGE]] +; CHECK-SPIRV: OpControlBarrier %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_INVOCATION]] %[[#LOCAL]] +; CHECK-SPIRV: OpControlBarrier %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#LOCAL]] +; CHECK-SPIRV: OpControlBarrier %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_DEVICE]] %[[#LOCAL]] +; CHECK-SPIRV: OpControlBarrier %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_CROSS_DEVICE]] %[[#LOCAL]] +; CHECK-SPIRV: OpControlBarrier %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_SUBGROUP]] %[[#LOCAL]] +; CHECK-SPIRV: OpControlBarrier %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#GLOBAL]] + +define dso_local spir_kernel void @test_barrier_const_flags() local_unnamed_addr { +entry: + tail call spir_func void @_Z18work_group_barrierj(i32 noundef 1) + tail call spir_func void @_Z18work_group_barrierj(i32 noundef 2) + tail call spir_func void @_Z18work_group_barrierj(i32 noundef 4) + tail call spir_func void @_Z18work_group_barrierj(i32 noundef 3) + tail call spir_func void @_Z18work_group_barrierj(i32 noundef 5) + tail call spir_func void @_Z18work_group_barrierj(i32 noundef 7) + tail call spir_func void @_Z18work_group_barrierj12memory_scope(i32 noundef 1, i32 noundef 0) + tail call spir_func void @_Z18work_group_barrierj12memory_scope(i32 noundef 1, i32 noundef 1) + tail call spir_func void @_Z18work_group_barrierj12memory_scope(i32 noundef 1, i32 noundef 2) + tail call spir_func void @_Z18work_group_barrierj12memory_scope(i32 noundef 1, i32 noundef 3) + tail call spir_func void @_Z18work_group_barrierj12memory_scope(i32 noundef 1, i32 noundef 4) + tail call spir_func void @_Z7barrierj(i32 noundef 2) + ret void +} + +declare spir_func void @_Z18work_group_barrierj(i32 noundef) local_unnamed_addr + +declare spir_func void @_Z18work_group_barrierj12memory_scope(i32 noundef, i32 noundef) local_unnamed_addr + +declare spir_func void @_Z7barrierj(i32 noundef) local_unnamed_addr + +define dso_local spir_kernel void @test_barrier_non_const_flags(i32 noundef %flags, i32 noundef %scope) local_unnamed_addr { +entry: + ret void +} + +;; References: +;; [1]: https://www.khronos.org/registry/OpenCL/sdk/2.0/docs/man/xhtml/work_group_barrier.html +;; [2]: https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpControlBarrier +;; [3]: https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#_a_id_memory_semantics__id_a_memory_semantics_lt_id_gt diff --git a/llvm/test/CodeGen/SPIRV/transcoding/atomic_load_store.ll b/llvm/test/CodeGen/SPIRV/transcoding/atomic_load_store.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/atomic_load_store.ll @@ -0,0 +1,32 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +;; Check 'LLVM ==> SPIR-V' conversion of atomic_load and atomic_store. + +; CHECK-SPIRV-LABEL: OpFunction +; CHECK-SPIRV-NEXT: %[[#object:]] = OpFunctionParameter %[[#]] +; CHECK-SPIRV: %[[#ret:]] = OpAtomicLoad %[[#]] %[[#object]] %[[#]] %[[#]] +; CHECK-SPIRV: OpReturnValue %[[#ret]] +; CHECK-SPIRV-LABEL: OpFunctionEnd + +define spir_func i32 @test_load(i32 addrspace(4)* %object) { +entry: + %0 = call spir_func i32 @_Z11atomic_loadPVU3AS4U7_Atomici(i32 addrspace(4)* %object) + ret i32 %0 +} + +; CHECK-SPIRV-LABEL: OpFunction +; CHECK-SPIRV-NEXT: %[[#object:]] = OpFunctionParameter %[[#]] +; CHECK-SPIRV-NEXT: OpFunctionParameter +; CHECK-SPIRV-NEXT: %[[#desired:]] = OpFunctionParameter %[[#]] +; CHECK-SPIRV: OpAtomicStore %[[#object]] %[[#]] %[[#]] %[[#desired]] +; CHECK-SPIRV-LABEL: OpFunctionEnd + +define spir_func void @test_store(i32 addrspace(4)* %object, i32 addrspace(4)* %expected, i32 %desired) { +entry: + call spir_func void @_Z12atomic_storePVU3AS4U7_Atomicii(i32 addrspace(4)* %object, i32 %desired) + ret void +} + +declare spir_func i32 @_Z11atomic_loadPVU3AS4U7_Atomici(i32 addrspace(4)*) + +declare spir_func void @_Z12atomic_storePVU3AS4U7_Atomicii(i32 addrspace(4)*, i32) diff --git a/llvm/test/CodeGen/SPIRV/transcoding/builtin_calls.ll b/llvm/test/CodeGen/SPIRV/transcoding/builtin_calls.ll --- a/llvm/test/CodeGen/SPIRV/transcoding/builtin_calls.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/builtin_calls.ll @@ -1,11 +1,11 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV -; CHECK-SPIRV-DAG: OpDecorate %[[Id:[0-9]+]] BuiltIn GlobalInvocationId -; CHECK-SPIRV-DAG: OpDecorate %[[Id:[0-9]+]] BuiltIn GlobalLinearId -; CHECK-SPIRV: %[[Id:[0-9]+]] = OpVariable %{{[0-9]+}} -; CHECK-SPIRV: %[[Id:[0-9]+]] = OpVariable %{{[0-9]+}} +; CHECK-SPIRV-DAG: OpDecorate %[[#Id:]] BuiltIn GlobalInvocationId +; CHECK-SPIRV-DAG: OpDecorate %[[#Id:]] BuiltIn GlobalLinearId +; CHECK-SPIRV: %[[#Id:]] = OpVariable %[[#]] +; CHECK-SPIRV: %[[#Id:]] = OpVariable %[[#]] -define spir_kernel void @f(){ +define spir_kernel void @f() { entry: %0 = call spir_func i32 @_Z29__spirv_BuiltInGlobalLinearIdv() %1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 1) diff --git a/llvm/test/CodeGen/SPIRV/transcoding/fclamp.ll b/llvm/test/CodeGen/SPIRV/transcoding/fclamp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/fclamp.ll @@ -0,0 +1,18 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV: %[[#]] = OpExtInst %[[#]] %[[#]] fclamp +; CHECK-SPIRV-NOT: %[[#]] = OpExtInst %[[#]] %[[#]] clamp + +define spir_kernel void @test_scalar(float addrspace(1)* nocapture readonly %f) { +entry: + %0 = load float, float addrspace(1)* %f, align 4 + %call = tail call spir_func float @_Z5clampfff(float %0, float 0.000000e+00, float 1.000000e+00) + %1 = load float, float addrspace(1)* %f, align 4 + %conv = fptrunc float %1 to half + %call1 = tail call spir_func half @_Z5clampDhDhDh(half %conv, half %conv, half %conv) + ret void +} + +declare spir_func float @_Z5clampfff(float, float, float) + +declare spir_func half @_Z5clampDhDhDh(half, half, half) diff --git a/llvm/test/CodeGen/SPIRV/transcoding/fmod.ll b/llvm/test/CodeGen/SPIRV/transcoding/fmod.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/fmod.ll @@ -0,0 +1,14 @@ +;; __kernel void fmod_kernel( float out, float in1, float in2 ) +;; { out = fmod( in1, in2 ); } + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV: %[[#]] = OpExtInst %[[#]] %[[#]] fmod %[[#]] %[[#]] + +define spir_kernel void @fmod_kernel(float %out, float %in1, float %in2) { +entry: + %call = call spir_func float @_Z4fmodff(float %in1, float %in2) + ret void +} + +declare spir_func float @_Z4fmodff(float, float) diff --git a/llvm/test/CodeGen/SPIRV/transcoding/group_ops.ll b/llvm/test/CodeGen/SPIRV/transcoding/group_ops.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/group_ops.ll @@ -0,0 +1,264 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV-DAG: %[[#int:]] = OpTypeInt 32 0 +; CHECK-SPIRV-DAG: %[[#float:]] = OpTypeFloat 32 +; CHECK-SPIRV-DAG: %[[#ScopeWorkgroup:]] = OpConstant %[[#int]] 2 +; CHECK-SPIRV-DAG: %[[#ScopeSubgroup:]] = OpConstant %[[#int]] 3 + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupFMax %[[#float]] %[[#ScopeWorkgroup]] Reduce +; CHECK-SPIRV: OpFunctionEnd + +;; kernel void testWorkGroupFMax(float a, global float *res) { +;; res[0] = work_group_reduce_max(a); +;; } + +define dso_local spir_kernel void @testWorkGroupFMax(float noundef %a, float addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr { +entry: + %call = call spir_func float @_Z21work_group_reduce_maxf(float noundef %a) + store float %call, float addrspace(1)* %res, align 4 + ret void +} + +declare spir_func float @_Z21work_group_reduce_maxf(float noundef) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupFMin %[[#float]] %[[#ScopeWorkgroup]] Reduce +; CHECK-SPIRV: OpFunctionEnd + +;; kernel void testWorkGroupFMin(float a, global float *res) { +;; res[0] = work_group_reduce_min(a); +;; } + +define dso_local spir_kernel void @testWorkGroupFMin(float noundef %a, float addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr { +entry: + %call = call spir_func float @_Z21work_group_reduce_minf(float noundef %a) + store float %call, float addrspace(1)* %res, align 4 + ret void +} + +declare spir_func float @_Z21work_group_reduce_minf(float noundef) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupFAdd %[[#float]] %[[#ScopeWorkgroup]] Reduce +; CHECK-SPIRV: OpFunctionEnd + +;; kernel void testWorkGroupFAdd(float a, global float *res) { +;; res[0] = work_group_reduce_add(a); +;; } + +define dso_local spir_kernel void @testWorkGroupFAdd(float noundef %a, float addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr { +entry: + %call = call spir_func float @_Z21work_group_reduce_addf(float noundef %a) + store float %call, float addrspace(1)* %res, align 4 + ret void +} + +declare spir_func float @_Z21work_group_reduce_addf(float noundef) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupFMax %[[#float]] %[[#ScopeWorkgroup]] InclusiveScan +; CHECK-SPIRV: OpFunctionEnd + +;; kernel void testWorkGroupScanInclusiveFMax(float a, global float *res) { +;; res[0] = work_group_scan_inclusive_max(a); +;; } + +define dso_local spir_kernel void @testWorkGroupScanInclusiveFMax(float noundef %a, float addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr { +entry: + %call = call spir_func float @_Z29work_group_scan_inclusive_maxf(float noundef %a) + store float %call, float addrspace(1)* %res, align 4 + ret void +} + +declare spir_func float @_Z29work_group_scan_inclusive_maxf(float noundef) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupFMax %[[#float]] %[[#ScopeWorkgroup]] ExclusiveScan +; CHECK-SPIRV: OpFunctionEnd + +;; kernel void testWorkGroupScanExclusiveFMax(float a, global float *res) { +;; res[0] = work_group_scan_exclusive_max(a); +;; } + +define dso_local spir_kernel void @testWorkGroupScanExclusiveFMax(float noundef %a, float addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr { +entry: + %call = call spir_func float @_Z29work_group_scan_exclusive_maxf(float noundef %a) + store float %call, float addrspace(1)* %res, align 4 + ret void +} + +declare spir_func float @_Z29work_group_scan_exclusive_maxf(float noundef) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupSMax %[[#int]] %[[#ScopeWorkgroup]] Reduce +; CHECK-SPIRV: OpFunctionEnd + +;; kernel void testWorkGroupSMax(int a, global int *res) { +;; res[0] = work_group_reduce_max(a); +;; } + +define dso_local spir_kernel void @testWorkGroupSMax(i32 noundef %a, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr { +entry: + %call = call spir_func i32 @_Z21work_group_reduce_maxi(i32 noundef %a) + store i32 %call, i32 addrspace(1)* %res, align 4 + ret void +} + +declare spir_func i32 @_Z21work_group_reduce_maxi(i32 noundef) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupSMin %[[#int]] %[[#ScopeWorkgroup]] Reduce +; CHECK-SPIRV: OpFunctionEnd + +;; kernel void testWorkGroupSMin(int a, global int *res) { +;; res[0] = work_group_reduce_min(a); +;; } + +define dso_local spir_kernel void @testWorkGroupSMin(i32 noundef %a, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr { +entry: + %call = call spir_func i32 @_Z21work_group_reduce_mini(i32 noundef %a) + store i32 %call, i32 addrspace(1)* %res, align 4 + ret void +} + +declare spir_func i32 @_Z21work_group_reduce_mini(i32 noundef) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupIAdd %[[#int]] %[[#ScopeWorkgroup]] Reduce +; CHECK-SPIRV: OpFunctionEnd + +;; kernel void testWorkGroupIAddSigned(int a, global int *res) { +;; res[0] = work_group_reduce_add(a); +;; } + +define dso_local spir_kernel void @testWorkGroupIAddSigned(i32 noundef %a, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr { +entry: + %call = call spir_func i32 @_Z21work_group_reduce_addi(i32 noundef %a) + store i32 %call, i32 addrspace(1)* %res, align 4 + ret void +} + +declare spir_func i32 @_Z21work_group_reduce_addi(i32 noundef) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupIAdd %[[#int]] %[[#ScopeWorkgroup]] Reduce +; CHECK-SPIRV: OpFunctionEnd + +;; kernel void testWorkGroupIAddUnsigned(uint a, global uint *res) { +;; res[0] = work_group_reduce_add(a); +;; } + +define dso_local spir_kernel void @testWorkGroupIAddUnsigned(i32 noundef %a, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr { +entry: + %call = call spir_func i32 @_Z21work_group_reduce_addj(i32 noundef %a) + store i32 %call, i32 addrspace(1)* %res, align 4 + ret void +} + +declare spir_func i32 @_Z21work_group_reduce_addj(i32 noundef) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupUMax %[[#int]] %[[#ScopeWorkgroup]] Reduce +; CHECK-SPIRV: OpFunctionEnd + +;; kernel void testWorkGroupUMax(uint a, global uint *res) { +;; res[0] = work_group_reduce_max(a); +;; } + +define dso_local spir_kernel void @testWorkGroupUMax(i32 noundef %a, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr { +entry: + %call = call spir_func i32 @_Z21work_group_reduce_maxj(i32 noundef %a) + store i32 %call, i32 addrspace(1)* %res, align 4 + ret void +} + +declare spir_func i32 @_Z21work_group_reduce_maxj(i32 noundef) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupUMax %[[#int]] %[[#ScopeSubgroup]] Reduce +; CHECK-SPIRV: OpFunctionEnd + +;; #pragma OPENCL EXTENSION cl_khr_subgroups: enable +;; kernel void testSubGroupUMax(uint a, global uint *res) { +;; res[0] = sub_group_reduce_max(a); +;; } +;; #pragma OPENCL EXTENSION cl_khr_subgroups: disable + +define dso_local spir_kernel void @testSubGroupUMax(i32 noundef %a, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr { +entry: + %call = call spir_func i32 @_Z20sub_group_reduce_maxj(i32 noundef %a) + store i32 %call, i32 addrspace(1)* %res, align 4 + ret void +} + +declare spir_func i32 @_Z20sub_group_reduce_maxj(i32 noundef) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupUMax %[[#int]] %[[#ScopeWorkgroup]] InclusiveScan +; CHECK-SPIRV: OpFunctionEnd + +;; kernel void testWorkGroupScanInclusiveUMax(uint a, global uint *res) { +;; res[0] = work_group_scan_inclusive_max(a); +;; } + +define dso_local spir_kernel void @testWorkGroupScanInclusiveUMax(i32 noundef %a, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr { +entry: + %call = call spir_func i32 @_Z29work_group_scan_inclusive_maxj(i32 noundef %a) + store i32 %call, i32 addrspace(1)* %res, align 4 + ret void +} + +declare spir_func i32 @_Z29work_group_scan_inclusive_maxj(i32 noundef) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupUMax %[[#int]] %[[#ScopeWorkgroup]] ExclusiveScan +; CHECK-SPIRV: OpFunctionEnd + +;; kernel void testWorkGroupScanExclusiveUMax(uint a, global uint *res) { +;; res[0] = work_group_scan_exclusive_max(a); +;; } + +define dso_local spir_kernel void @testWorkGroupScanExclusiveUMax(i32 noundef %a, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr { +entry: + %call = call spir_func i32 @_Z29work_group_scan_exclusive_maxj(i32 noundef %a) + store i32 %call, i32 addrspace(1)* %res, align 4 + ret void +} + +declare spir_func i32 @_Z29work_group_scan_exclusive_maxj(i32 noundef) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupUMin %[[#int]] %[[#ScopeWorkgroup]] Reduce +; CHECK-SPIRV: OpFunctionEnd + +;; kernel void testWorkGroupUMin(uint a, global uint *res) { +;; res[0] = work_group_reduce_min(a); +;; } + +define dso_local spir_kernel void @testWorkGroupUMin(i32 noundef %a, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr { +entry: + %call = call spir_func i32 @_Z21work_group_reduce_minj(i32 noundef %a) + store i32 %call, i32 addrspace(1)* %res, align 4 + ret void +} + +declare spir_func i32 @_Z21work_group_reduce_minj(i32 noundef) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#int]] %[[#ScopeWorkgroup]] +; CHECK-SPIRV: OpFunctionEnd + +;; kernel void testWorkGroupBroadcast(uint a, global size_t *id, global int *res) { +;; res[0] = work_group_broadcast(a, *id); +;; } + +define dso_local spir_kernel void @testWorkGroupBroadcast(i32 noundef %a, i32 addrspace(1)* nocapture noundef readonly %id, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr { +entry: + %0 = load i32, i32 addrspace(1)* %id, align 4 + %call = call spir_func i32 @_Z20work_group_broadcastjj(i32 noundef %a, i32 noundef %0) + store i32 %call, i32 addrspace(1)* %res, align 4 + ret void +} + +declare spir_func i32 @_Z20work_group_broadcastjj(i32 noundef, i32 noundef) local_unnamed_addr diff --git a/llvm/test/CodeGen/SPIRV/transcoding/ldexp.ll b/llvm/test/CodeGen/SPIRV/transcoding/ldexp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/ldexp.ll @@ -0,0 +1,57 @@ +;; Check that backend converts scalar arg to vector for ldexp math instructions + +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +;; #pragma OPENCL EXTENSION cl_khr_fp16 : enable +;; #pragma OPENCL EXTENSION cl_khr_fp64 : enable + +;; __kernel void test_kernel_half(half3 x, int k, __global half3* ret) { +;; *ret = ldexp(x, k); +;; } + +; CHECK-SPIRV: %{{.*}} ldexp + +define dso_local spir_kernel void @test_kernel_half(<3 x half> noundef %x, i32 noundef %k, <3 x half> addrspace(1)* nocapture noundef writeonly %ret) local_unnamed_addr { +entry: + %call = call spir_func <3 x half> @_Z5ldexpDv3_Dhi(<3 x half> noundef %x, i32 noundef %k) + %extractVec2 = shufflevector <3 x half> %call, <3 x half> poison, <4 x i32> + %storetmp3 = bitcast <3 x half> addrspace(1)* %ret to <4 x half> addrspace(1)* + store <4 x half> %extractVec2, <4 x half> addrspace(1)* %storetmp3, align 8 + ret void +} + +declare spir_func <3 x half> @_Z5ldexpDv3_Dhi(<3 x half> noundef, i32 noundef) local_unnamed_addr + +;; __kernel void test_kernel_float(float3 x, int k, __global float3* ret) { +;; *ret = ldexp(x, k); +;; } + +; CHECK-SPIRV: %{{.*}} ldexp + +define dso_local spir_kernel void @test_kernel_float(<3 x float> noundef %x, i32 noundef %k, <3 x float> addrspace(1)* nocapture noundef writeonly %ret) local_unnamed_addr { +entry: + %call = call spir_func <3 x float> @_Z5ldexpDv3_fi(<3 x float> noundef %x, i32 noundef %k) + %extractVec2 = shufflevector <3 x float> %call, <3 x float> poison, <4 x i32> + %storetmp3 = bitcast <3 x float> addrspace(1)* %ret to <4 x float> addrspace(1)* + store <4 x float> %extractVec2, <4 x float> addrspace(1)* %storetmp3, align 16 + ret void +} + +declare spir_func <3 x float> @_Z5ldexpDv3_fi(<3 x float> noundef, i32 noundef) local_unnamed_addr + +;; __kernel void test_kernel_double(double3 x, int k, __global double3* ret) { +;; *ret = ldexp(x, k); +;; } + +; CHECK-SPIRV: %{{.*}} ldexp + +define dso_local spir_kernel void @test_kernel_double(<3 x double> noundef %x, i32 noundef %k, <3 x double> addrspace(1)* nocapture noundef writeonly %ret) local_unnamed_addr { +entry: + %call = call spir_func <3 x double> @_Z5ldexpDv3_di(<3 x double> noundef %x, i32 noundef %k) + %extractVec2 = shufflevector <3 x double> %call, <3 x double> poison, <4 x i32> + %storetmp3 = bitcast <3 x double> addrspace(1)* %ret to <4 x double> addrspace(1)* + store <4 x double> %extractVec2, <4 x double> addrspace(1)* %storetmp3, align 32 + ret void +} + +declare spir_func <3 x double> @_Z5ldexpDv3_di(<3 x double> noundef, i32 noundef) local_unnamed_addr diff --git a/llvm/test/CodeGen/SPIRV/transcoding/relationals_double.ll b/llvm/test/CodeGen/SPIRV/transcoding/relationals_double.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/relationals_double.ll @@ -0,0 +1,161 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +;; This test checks following SYCL relational builtins with double and double2 +;; types: +;; isfinite, isinf, isnan, isnormal, signbit, isequal, isnotequal, isgreater +;; isgreaterequal, isless, islessequal, islessgreater, isordered, isunordered + +; CHECK-SPIRV: %[[#BoolTypeID:]] = OpTypeBool +; CHECK-SPIRV: %[[#BoolVectorTypeID:]] = OpTypeVector %[[#BoolTypeID]] 2 + +; CHECK-SPIRV: OpIsFinite %[[#BoolTypeID]] +; CHECK-SPIRV: OpIsInf %[[#BoolTypeID]] +; CHECK-SPIRV: OpIsNan %[[#BoolTypeID]] +; CHECK-SPIRV: OpIsNormal %[[#BoolTypeID]] +; CHECK-SPIRV: OpSignBitSet %[[#BoolTypeID]] +; CHECK-SPIRV: OpFOrdEqual %[[#BoolTypeID]] +; CHECK-SPIRV: OpFUnordNotEqual %[[#BoolTypeID]] +; CHECK-SPIRV: OpFOrdGreaterThan %[[#BoolTypeID]] +; CHECK-SPIRV: OpFOrdGreaterThanEqual %[[#BoolTypeID]] +; CHECK-SPIRV: OpFOrdLessThan %[[#BoolTypeID]] +; CHECK-SPIRV: OpFOrdLessThanEqual %[[#BoolTypeID]] +; CHECK-SPIRV: OpFOrdNotEqual %[[#BoolTypeID]] +; CHECK-SPIRV: OpOrdered %[[#BoolTypeID]] +; CHECK-SPIRV: OpUnordered %[[#BoolTypeID]] + +; CHECK-SPIRV: OpIsFinite %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpIsInf %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpIsNan %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpIsNormal %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpSignBitSet %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFOrdEqual %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFUnordNotEqual %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFOrdGreaterThan %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFOrdGreaterThanEqual %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFOrdLessThan %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFOrdLessThanEqual %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFOrdNotEqual %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpOrdered %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpUnordered %[[#BoolVectorTypeID]] + +define dso_local spir_func void @test_scalar(i32 addrspace(4)* nocapture writeonly %out, double %d) local_unnamed_addr { +entry: + %call = tail call spir_func i32 @_Z8isfinited(double %d) + %call1 = tail call spir_func i32 @_Z5isinfd(double %d) + %add = add nsw i32 %call1, %call + %call2 = tail call spir_func i32 @_Z5isnand(double %d) + %add3 = add nsw i32 %add, %call2 + %call4 = tail call spir_func i32 @_Z8isnormald(double %d) + %add5 = add nsw i32 %add3, %call4 + %call6 = tail call spir_func i32 @_Z7signbitd(double %d) + %add7 = add nsw i32 %add5, %call6 + %call8 = tail call spir_func i32 @_Z7isequaldd(double %d, double %d) + %add9 = add nsw i32 %add7, %call8 + %call10 = tail call spir_func i32 @_Z10isnotequaldd(double %d, double %d) + %add11 = add nsw i32 %add9, %call10 + %call12 = tail call spir_func i32 @_Z9isgreaterdd(double %d, double %d) + %add13 = add nsw i32 %add11, %call12 + %call14 = tail call spir_func i32 @_Z14isgreaterequaldd(double %d, double %d) + %add15 = add nsw i32 %add13, %call14 + %call16 = tail call spir_func i32 @_Z6islessdd(double %d, double %d) + %add17 = add nsw i32 %add15, %call16 + %call18 = tail call spir_func i32 @_Z11islessequaldd(double %d, double %d) + %add19 = add nsw i32 %add17, %call18 + %call20 = tail call spir_func i32 @_Z13islessgreaterdd(double %d, double %d) + %add21 = add nsw i32 %add19, %call20 + %call22 = tail call spir_func i32 @_Z9isordereddd(double %d, double %d) + %add23 = add nsw i32 %add21, %call22 + %call24 = tail call spir_func i32 @_Z11isunordereddd(double %d, double %d) + %add25 = add nsw i32 %add23, %call24 + store i32 %add25, i32 addrspace(4)* %out, align 4 + ret void +} + +declare spir_func i32 @_Z8isfinited(double) local_unnamed_addr + +declare spir_func i32 @_Z5isinfd(double) local_unnamed_addr + +declare spir_func i32 @_Z5isnand(double) local_unnamed_addr + +declare spir_func i32 @_Z8isnormald(double) local_unnamed_addr + +declare spir_func i32 @_Z7signbitd(double) local_unnamed_addr + +declare spir_func i32 @_Z7isequaldd(double, double) local_unnamed_addr + +declare spir_func i32 @_Z10isnotequaldd(double, double) local_unnamed_addr + +declare spir_func i32 @_Z9isgreaterdd(double, double) local_unnamed_addr + +declare spir_func i32 @_Z14isgreaterequaldd(double, double) local_unnamed_addr + +declare spir_func i32 @_Z6islessdd(double, double) local_unnamed_addr + +declare spir_func i32 @_Z11islessequaldd(double, double) local_unnamed_addr + +declare spir_func i32 @_Z13islessgreaterdd(double, double) local_unnamed_addr + +declare spir_func i32 @_Z9isordereddd(double, double) local_unnamed_addr + +declare spir_func i32 @_Z11isunordereddd(double, double) local_unnamed_addr + +define dso_local spir_func void @test_vector(<2 x i64> addrspace(4)* nocapture writeonly %out, <2 x double> %d) local_unnamed_addr { +entry: + %call = tail call spir_func <2 x i64> @_Z8isfiniteDv2_d(<2 x double> %d) + %call1 = tail call spir_func <2 x i64> @_Z5isinfDv2_d(<2 x double> %d) + %add = add <2 x i64> %call1, %call + %call2 = tail call spir_func <2 x i64> @_Z5isnanDv2_d(<2 x double> %d) + %add3 = add <2 x i64> %add, %call2 + %call4 = tail call spir_func <2 x i64> @_Z8isnormalDv2_d(<2 x double> %d) + %add5 = add <2 x i64> %add3, %call4 + %call6 = tail call spir_func <2 x i64> @_Z7signbitDv2_d(<2 x double> %d) + %add7 = add <2 x i64> %add5, %call6 + %call8 = tail call spir_func <2 x i64> @_Z7isequalDv2_dS_(<2 x double> %d, <2 x double> %d) + %add9 = add <2 x i64> %add7, %call8 + %call10 = tail call spir_func <2 x i64> @_Z10isnotequalDv2_dS_(<2 x double> %d, <2 x double> %d) + %add11 = add <2 x i64> %add9, %call10 + %call12 = tail call spir_func <2 x i64> @_Z9isgreaterDv2_dS_(<2 x double> %d, <2 x double> %d) + %add13 = add <2 x i64> %add11, %call12 + %call14 = tail call spir_func <2 x i64> @_Z14isgreaterequalDv2_dS_(<2 x double> %d, <2 x double> %d) + %add15 = add <2 x i64> %add13, %call14 + %call16 = tail call spir_func <2 x i64> @_Z6islessDv2_dS_(<2 x double> %d, <2 x double> %d) + %add17 = add <2 x i64> %add15, %call16 + %call18 = tail call spir_func <2 x i64> @_Z11islessequalDv2_dS_(<2 x double> %d, <2 x double> %d) + %add19 = add <2 x i64> %add17, %call18 + %call20 = tail call spir_func <2 x i64> @_Z13islessgreaterDv2_dS_(<2 x double> %d, <2 x double> %d) + %add21 = add <2 x i64> %add19, %call20 + %call22 = tail call spir_func <2 x i64> @_Z9isorderedDv2_dS_(<2 x double> %d, <2 x double> %d) + %add23 = add <2 x i64> %add21, %call22 + %call24 = tail call spir_func <2 x i64> @_Z11isunorderedDv2_dS_(<2 x double> %d, <2 x double> %d) + %add25 = add <2 x i64> %add23, %call24 + store <2 x i64> %add25, <2 x i64> addrspace(4)* %out, align 16 + ret void +} + +declare spir_func <2 x i64> @_Z8isfiniteDv2_d(<2 x double>) local_unnamed_addr + +declare spir_func <2 x i64> @_Z5isinfDv2_d(<2 x double>) local_unnamed_addr + +declare spir_func <2 x i64> @_Z5isnanDv2_d(<2 x double>) local_unnamed_addr + +declare spir_func <2 x i64> @_Z8isnormalDv2_d(<2 x double>) local_unnamed_addr + +declare spir_func <2 x i64> @_Z7signbitDv2_d(<2 x double>) local_unnamed_addr + +declare spir_func <2 x i64> @_Z7isequalDv2_dS_(<2 x double>, <2 x double>) local_unnamed_addr + +declare spir_func <2 x i64> @_Z10isnotequalDv2_dS_(<2 x double>, <2 x double>) local_unnamed_addr + +declare spir_func <2 x i64> @_Z9isgreaterDv2_dS_(<2 x double>, <2 x double>) local_unnamed_addr + +declare spir_func <2 x i64> @_Z14isgreaterequalDv2_dS_(<2 x double>, <2 x double>) local_unnamed_addr + +declare spir_func <2 x i64> @_Z6islessDv2_dS_(<2 x double>, <2 x double>) local_unnamed_addr + +declare spir_func <2 x i64> @_Z11islessequalDv2_dS_(<2 x double>, <2 x double>) local_unnamed_addr + +declare spir_func <2 x i64> @_Z13islessgreaterDv2_dS_(<2 x double>, <2 x double>) local_unnamed_addr + +declare spir_func <2 x i64> @_Z9isorderedDv2_dS_(<2 x double>, <2 x double>) local_unnamed_addr + +declare spir_func <2 x i64> @_Z11isunorderedDv2_dS_(<2 x double>, <2 x double>) local_unnamed_addr diff --git a/llvm/test/CodeGen/SPIRV/transcoding/relationals_float.ll b/llvm/test/CodeGen/SPIRV/transcoding/relationals_float.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/relationals_float.ll @@ -0,0 +1,161 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +;; This test checks following SYCL relational builtins with float and float2 +;; types: +;; isfinite, isinf, isnan, isnormal, signbit, isequal, isnotequal, isgreater +;; isgreaterequal, isless, islessequal, islessgreater, isordered, isunordered + +; CHECK-SPIRV: %[[#BoolTypeID:]] = OpTypeBool +; CHECK-SPIRV: %[[#BoolVectorTypeID:]] = OpTypeVector %[[#BoolTypeID]] 2 + +; CHECK-SPIRV: OpIsFinite %[[#BoolTypeID]] +; CHECK-SPIRV: OpIsInf %[[#BoolTypeID]] +; CHECK-SPIRV: OpIsNan %[[#BoolTypeID]] +; CHECK-SPIRV: OpIsNormal %[[#BoolTypeID]] +; CHECK-SPIRV: OpSignBitSet %[[#BoolTypeID]] +; CHECK-SPIRV: OpFOrdEqual %[[#BoolTypeID]] +; CHECK-SPIRV: OpFUnordNotEqual %[[#BoolTypeID]] +; CHECK-SPIRV: OpFOrdGreaterThan %[[#BoolTypeID]] +; CHECK-SPIRV: OpFOrdGreaterThanEqual %[[#BoolTypeID]] +; CHECK-SPIRV: OpFOrdLessThan %[[#BoolTypeID]] +; CHECK-SPIRV: OpFOrdLessThanEqual %[[#BoolTypeID]] +; CHECK-SPIRV: OpFOrdNotEqual %[[#BoolTypeID]] +; CHECK-SPIRV: OpOrdered %[[#BoolTypeID]] +; CHECK-SPIRV: OpUnordered %[[#BoolTypeID]] + +; CHECK-SPIRV: OpIsFinite %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpIsInf %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpIsNan %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpIsNormal %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpSignBitSet %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFOrdEqual %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFUnordNotEqual %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFOrdGreaterThan %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFOrdGreaterThanEqual %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFOrdLessThan %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFOrdLessThanEqual %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFOrdNotEqual %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpOrdered %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpUnordered %[[#BoolVectorTypeID]] + +define dso_local spir_func void @test_scalar(i32 addrspace(4)* nocapture writeonly %out, float %f) local_unnamed_addr { +entry: + %call = tail call spir_func i32 @_Z8isfinitef(float %f) + %call1 = tail call spir_func i32 @_Z5isinff(float %f) + %add = add nsw i32 %call1, %call + %call2 = tail call spir_func i32 @_Z5isnanf(float %f) + %add3 = add nsw i32 %add, %call2 + %call4 = tail call spir_func i32 @_Z8isnormalf(float %f) + %add5 = add nsw i32 %add3, %call4 + %call6 = tail call spir_func i32 @_Z7signbitf(float %f) + %add7 = add nsw i32 %add5, %call6 + %call8 = tail call spir_func i32 @_Z7isequalff(float %f, float %f) + %add9 = add nsw i32 %add7, %call8 + %call10 = tail call spir_func i32 @_Z10isnotequalff(float %f, float %f) + %add11 = add nsw i32 %add9, %call10 + %call12 = tail call spir_func i32 @_Z9isgreaterff(float %f, float %f) + %add13 = add nsw i32 %add11, %call12 + %call14 = tail call spir_func i32 @_Z14isgreaterequalff(float %f, float %f) + %add15 = add nsw i32 %add13, %call14 + %call16 = tail call spir_func i32 @_Z6islessff(float %f, float %f) + %add17 = add nsw i32 %add15, %call16 + %call18 = tail call spir_func i32 @_Z11islessequalff(float %f, float %f) + %add19 = add nsw i32 %add17, %call18 + %call20 = tail call spir_func i32 @_Z13islessgreaterff(float %f, float %f) + %add21 = add nsw i32 %add19, %call20 + %call22 = tail call spir_func i32 @_Z9isorderedff(float %f, float %f) + %add23 = add nsw i32 %add21, %call22 + %call24 = tail call spir_func i32 @_Z11isunorderedff(float %f, float %f) + %add25 = add nsw i32 %add23, %call24 + store i32 %add25, i32 addrspace(4)* %out, align 4 + ret void +} + +declare spir_func i32 @_Z8isfinitef(float) local_unnamed_addr + +declare spir_func i32 @_Z5isinff(float) local_unnamed_addr + +declare spir_func i32 @_Z5isnanf(float) local_unnamed_addr + +declare spir_func i32 @_Z8isnormalf(float) local_unnamed_addr + +declare spir_func i32 @_Z7signbitf(float) local_unnamed_addr + +declare spir_func i32 @_Z7isequalff(float, float) local_unnamed_addr + +declare spir_func i32 @_Z10isnotequalff(float, float) local_unnamed_addr + +declare spir_func i32 @_Z9isgreaterff(float, float) local_unnamed_addr + +declare spir_func i32 @_Z14isgreaterequalff(float, float) local_unnamed_addr + +declare spir_func i32 @_Z6islessff(float, float) local_unnamed_addr + +declare spir_func i32 @_Z11islessequalff(float, float) local_unnamed_addr + +declare spir_func i32 @_Z13islessgreaterff(float, float) local_unnamed_addr + +declare spir_func i32 @_Z9isorderedff(float, float) local_unnamed_addr + +declare spir_func i32 @_Z11isunorderedff(float, float) local_unnamed_addr + +define dso_local spir_func void @test_vector(<2 x i32> addrspace(4)* nocapture writeonly %out, <2 x float> %f) local_unnamed_addr { +entry: + %call = tail call spir_func <2 x i32> @_Z8isfiniteDv2_f(<2 x float> %f) + %call1 = tail call spir_func <2 x i32> @_Z5isinfDv2_f(<2 x float> %f) + %add = add <2 x i32> %call1, %call + %call2 = tail call spir_func <2 x i32> @_Z5isnanDv2_f(<2 x float> %f) + %add3 = add <2 x i32> %add, %call2 + %call4 = tail call spir_func <2 x i32> @_Z8isnormalDv2_f(<2 x float> %f) + %add5 = add <2 x i32> %add3, %call4 + %call6 = tail call spir_func <2 x i32> @_Z7signbitDv2_f(<2 x float> %f) + %add7 = add <2 x i32> %add5, %call6 + %call8 = tail call spir_func <2 x i32> @_Z7isequalDv2_fS_(<2 x float> %f, <2 x float> %f) + %add9 = add <2 x i32> %add7, %call8 + %call10 = tail call spir_func <2 x i32> @_Z10isnotequalDv2_fS_(<2 x float> %f, <2 x float> %f) + %add11 = add <2 x i32> %add9, %call10 + %call12 = tail call spir_func <2 x i32> @_Z9isgreaterDv2_fS_(<2 x float> %f, <2 x float> %f) + %add13 = add <2 x i32> %add11, %call12 + %call14 = tail call spir_func <2 x i32> @_Z14isgreaterequalDv2_fS_(<2 x float> %f, <2 x float> %f) + %add15 = add <2 x i32> %add13, %call14 + %call16 = tail call spir_func <2 x i32> @_Z6islessDv2_fS_(<2 x float> %f, <2 x float> %f) + %add17 = add <2 x i32> %add15, %call16 + %call18 = tail call spir_func <2 x i32> @_Z11islessequalDv2_fS_(<2 x float> %f, <2 x float> %f) + %add19 = add <2 x i32> %add17, %call18 + %call20 = tail call spir_func <2 x i32> @_Z13islessgreaterDv2_fS_(<2 x float> %f, <2 x float> %f) + %add21 = add <2 x i32> %add19, %call20 + %call22 = tail call spir_func <2 x i32> @_Z9isorderedDv2_fS_(<2 x float> %f, <2 x float> %f) + %add23 = add <2 x i32> %add21, %call22 + %call24 = tail call spir_func <2 x i32> @_Z11isunorderedDv2_fS_(<2 x float> %f, <2 x float> %f) + %add25 = add <2 x i32> %add23, %call24 + store <2 x i32> %add25, <2 x i32> addrspace(4)* %out, align 8 + ret void +} + +declare spir_func <2 x i32> @_Z8isfiniteDv2_f(<2 x float>) local_unnamed_addr + +declare spir_func <2 x i32> @_Z5isinfDv2_f(<2 x float>) local_unnamed_addr + +declare spir_func <2 x i32> @_Z5isnanDv2_f(<2 x float>) local_unnamed_addr + +declare spir_func <2 x i32> @_Z8isnormalDv2_f(<2 x float>) local_unnamed_addr + +declare spir_func <2 x i32> @_Z7signbitDv2_f(<2 x float>) local_unnamed_addr + +declare spir_func <2 x i32> @_Z7isequalDv2_fS_(<2 x float>, <2 x float>) local_unnamed_addr + +declare spir_func <2 x i32> @_Z10isnotequalDv2_fS_(<2 x float>, <2 x float>) local_unnamed_addr + +declare spir_func <2 x i32> @_Z9isgreaterDv2_fS_(<2 x float>, <2 x float>) local_unnamed_addr + +declare spir_func <2 x i32> @_Z14isgreaterequalDv2_fS_(<2 x float>, <2 x float>) local_unnamed_addr + +declare spir_func <2 x i32> @_Z6islessDv2_fS_(<2 x float>, <2 x float>) local_unnamed_addr + +declare spir_func <2 x i32> @_Z11islessequalDv2_fS_(<2 x float>, <2 x float>) local_unnamed_addr + +declare spir_func <2 x i32> @_Z13islessgreaterDv2_fS_(<2 x float>, <2 x float>) local_unnamed_addr + +declare spir_func <2 x i32> @_Z9isorderedDv2_fS_(<2 x float>, <2 x float>) local_unnamed_addr + +declare spir_func <2 x i32> @_Z11isunorderedDv2_fS_(<2 x float>, <2 x float>) local_unnamed_addr diff --git a/llvm/test/CodeGen/SPIRV/transcoding/relationals_half.ll b/llvm/test/CodeGen/SPIRV/transcoding/relationals_half.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/relationals_half.ll @@ -0,0 +1,160 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +;; This test checks following SYCL relational builtins with half and half2 types: +;; isfinite, isinf, isnan, isnormal, signbit, isequal, isnotequal, isgreater +;; isgreaterequal, isless, islessequal, islessgreater, isordered, isunordered + +; CHECK-SPIRV: %[[#BoolTypeID:]] = OpTypeBool +; CHECK-SPIRV: %[[#BoolVectorTypeID:]] = OpTypeVector %[[#BoolTypeID]] 2 + +; CHECK-SPIRV: OpIsFinite %[[#BoolTypeID]] +; CHECK-SPIRV: OpIsInf %[[#BoolTypeID]] +; CHECK-SPIRV: OpIsNan %[[#BoolTypeID]] +; CHECK-SPIRV: OpIsNormal %[[#BoolTypeID]] +; CHECK-SPIRV: OpSignBitSet %[[#BoolTypeID]] +; CHECK-SPIRV: OpFOrdEqual %[[#BoolTypeID]] +; CHECK-SPIRV: OpFUnordNotEqual %[[#BoolTypeID]] +; CHECK-SPIRV: OpFOrdGreaterThan %[[#BoolTypeID]] +; CHECK-SPIRV: OpFOrdGreaterThanEqual %[[#BoolTypeID]] +; CHECK-SPIRV: OpFOrdLessThan %[[#BoolTypeID]] +; CHECK-SPIRV: OpFOrdLessThanEqual %[[#BoolTypeID]] +; CHECK-SPIRV: OpFOrdNotEqual %[[#BoolTypeID]] +; CHECK-SPIRV: OpOrdered %[[#BoolTypeID]] +; CHECK-SPIRV: OpUnordered %[[#BoolTypeID]] + +; CHECK-SPIRV: OpIsFinite %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpIsInf %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpIsNan %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpIsNormal %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpSignBitSet %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFOrdEqual %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFUnordNotEqual %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFOrdGreaterThan %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFOrdGreaterThanEqual %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFOrdLessThan %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFOrdLessThanEqual %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpFOrdNotEqual %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpOrdered %[[#BoolVectorTypeID]] +; CHECK-SPIRV: OpUnordered %[[#BoolVectorTypeID]] + +define dso_local spir_func void @test_scalar(i32 addrspace(4)* nocapture writeonly %out, half %h) local_unnamed_addr { +entry: + %call = tail call spir_func i32 @_Z8isfiniteDh(half %h) + %call1 = tail call spir_func i32 @_Z5isinfDh(half %h) + %add = add nsw i32 %call1, %call + %call2 = tail call spir_func i32 @_Z5isnanDh(half %h) + %add3 = add nsw i32 %add, %call2 + %call4 = tail call spir_func i32 @_Z8isnormalDh(half %h) + %add5 = add nsw i32 %add3, %call4 + %call6 = tail call spir_func i32 @_Z7signbitDh(half %h) + %add7 = add nsw i32 %add5, %call6 + %call8 = tail call spir_func i32 @_Z7isequalDhDh(half %h, half %h) + %add9 = add nsw i32 %add7, %call8 + %call10 = tail call spir_func i32 @_Z10isnotequalDhDh(half %h, half %h) + %add11 = add nsw i32 %add9, %call10 + %call12 = tail call spir_func i32 @_Z9isgreaterDhDh(half %h, half %h) + %add13 = add nsw i32 %add11, %call12 + %call14 = tail call spir_func i32 @_Z14isgreaterequalDhDh(half %h, half %h) + %add15 = add nsw i32 %add13, %call14 + %call16 = tail call spir_func i32 @_Z6islessDhDh(half %h, half %h) + %add17 = add nsw i32 %add15, %call16 + %call18 = tail call spir_func i32 @_Z11islessequalDhDh(half %h, half %h) + %add19 = add nsw i32 %add17, %call18 + %call20 = tail call spir_func i32 @_Z13islessgreaterDhDh(half %h, half %h) + %add21 = add nsw i32 %add19, %call20 + %call22 = tail call spir_func i32 @_Z9isorderedDhDh(half %h, half %h) + %add23 = add nsw i32 %add21, %call22 + %call24 = tail call spir_func i32 @_Z11isunorderedDhDh(half %h, half %h) + %add25 = add nsw i32 %add23, %call24 + store i32 %add25, i32 addrspace(4)* %out, align 4 + ret void +} + +declare spir_func i32 @_Z8isfiniteDh(half) local_unnamed_addr + +declare spir_func i32 @_Z5isinfDh(half) local_unnamed_addr + +declare spir_func i32 @_Z5isnanDh(half) local_unnamed_addr + +declare spir_func i32 @_Z8isnormalDh(half) local_unnamed_addr + +declare spir_func i32 @_Z7signbitDh(half) local_unnamed_addr + +declare spir_func i32 @_Z7isequalDhDh(half, half) local_unnamed_addr + +declare spir_func i32 @_Z10isnotequalDhDh(half, half) local_unnamed_addr + +declare spir_func i32 @_Z9isgreaterDhDh(half, half) local_unnamed_addr + +declare spir_func i32 @_Z14isgreaterequalDhDh(half, half) local_unnamed_addr + +declare spir_func i32 @_Z6islessDhDh(half, half) local_unnamed_addr + +declare spir_func i32 @_Z11islessequalDhDh(half, half) local_unnamed_addr + +declare spir_func i32 @_Z13islessgreaterDhDh(half, half) local_unnamed_addr + +declare spir_func i32 @_Z9isorderedDhDh(half, half) local_unnamed_addr + +declare spir_func i32 @_Z11isunorderedDhDh(half, half) local_unnamed_addr + +define dso_local spir_func void @test_vector(<2 x i16> addrspace(4)* nocapture writeonly %out, <2 x half> %h) local_unnamed_addr { +entry: + %call = tail call spir_func <2 x i16> @_Z8isfiniteDv2_Dh(<2 x half> %h) + %call1 = tail call spir_func <2 x i16> @_Z5isinfDv2_Dh(<2 x half> %h) + %add = add <2 x i16> %call1, %call + %call2 = tail call spir_func <2 x i16> @_Z5isnanDv2_Dh(<2 x half> %h) + %add3 = add <2 x i16> %add, %call2 + %call4 = tail call spir_func <2 x i16> @_Z8isnormalDv2_Dh(<2 x half> %h) + %add5 = add <2 x i16> %add3, %call4 + %call6 = tail call spir_func <2 x i16> @_Z7signbitDv2_Dh(<2 x half> %h) + %add7 = add <2 x i16> %add5, %call6 + %call8 = tail call spir_func <2 x i16> @_Z7isequalDv2_DhS_(<2 x half> %h, <2 x half> %h) + %add9 = add <2 x i16> %add7, %call8 + %call10 = tail call spir_func <2 x i16> @_Z10isnotequalDv2_DhS_(<2 x half> %h, <2 x half> %h) + %add11 = add <2 x i16> %add9, %call10 + %call12 = tail call spir_func <2 x i16> @_Z9isgreaterDv2_DhS_(<2 x half> %h, <2 x half> %h) + %add13 = add <2 x i16> %add11, %call12 + %call14 = tail call spir_func <2 x i16> @_Z14isgreaterequalDv2_DhS_(<2 x half> %h, <2 x half> %h) + %add15 = add <2 x i16> %add13, %call14 + %call16 = tail call spir_func <2 x i16> @_Z6islessDv2_DhS_(<2 x half> %h, <2 x half> %h) + %add17 = add <2 x i16> %add15, %call16 + %call18 = tail call spir_func <2 x i16> @_Z11islessequalDv2_DhS_(<2 x half> %h, <2 x half> %h) + %add19 = add <2 x i16> %add17, %call18 + %call20 = tail call spir_func <2 x i16> @_Z13islessgreaterDv2_DhS_(<2 x half> %h, <2 x half> %h) + %add21 = add <2 x i16> %add19, %call20 + %call22 = tail call spir_func <2 x i16> @_Z9isorderedDv2_DhS_(<2 x half> %h, <2 x half> %h) + %add23 = add <2 x i16> %add21, %call22 + %call24 = tail call spir_func <2 x i16> @_Z11isunorderedDv2_DhS_(<2 x half> %h, <2 x half> %h) + %add25 = add <2 x i16> %add23, %call24 + store <2 x i16> %add25, <2 x i16> addrspace(4)* %out, align 4 + ret void +} + +declare spir_func <2 x i16> @_Z8isfiniteDv2_Dh(<2 x half>) local_unnamed_addr + +declare spir_func <2 x i16> @_Z5isinfDv2_Dh(<2 x half>) local_unnamed_addr + +declare spir_func <2 x i16> @_Z5isnanDv2_Dh(<2 x half>) local_unnamed_addr + +declare spir_func <2 x i16> @_Z8isnormalDv2_Dh(<2 x half>) local_unnamed_addr + +declare spir_func <2 x i16> @_Z7signbitDv2_Dh(<2 x half>) local_unnamed_addr + +declare spir_func <2 x i16> @_Z7isequalDv2_DhS_(<2 x half>, <2 x half>) local_unnamed_addr + +declare spir_func <2 x i16> @_Z10isnotequalDv2_DhS_(<2 x half>, <2 x half>) local_unnamed_addr + +declare spir_func <2 x i16> @_Z9isgreaterDv2_DhS_(<2 x half>, <2 x half>) local_unnamed_addr + +declare spir_func <2 x i16> @_Z14isgreaterequalDv2_DhS_(<2 x half>, <2 x half>) local_unnamed_addr + +declare spir_func <2 x i16> @_Z6islessDv2_DhS_(<2 x half>, <2 x half>) local_unnamed_addr + +declare spir_func <2 x i16> @_Z11islessequalDv2_DhS_(<2 x half>, <2 x half>) local_unnamed_addr + +declare spir_func <2 x i16> @_Z13islessgreaterDv2_DhS_(<2 x half>, <2 x half>) local_unnamed_addr + +declare spir_func <2 x i16> @_Z9isorderedDv2_DhS_(<2 x half>, <2 x half>) local_unnamed_addr + +declare spir_func <2 x i16> @_Z11isunorderedDv2_DhS_(<2 x half>, <2 x half>) local_unnamed_addr diff --git a/llvm/test/CodeGen/SPIRV/transcoding/spec_const.ll b/llvm/test/CodeGen/SPIRV/transcoding/spec_const.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/spec_const.ll @@ -0,0 +1,61 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV-NOT: OpCapability Matrix +; CHECK-SPIRV-NOT: OpCapability Shader +; CHECK-SPIRV: OpCapability Float16Buffer + +; CHECK-SPIRV-DAG: OpDecorate %[[#SC0:]] SpecId 0 +; CHECK-SPIRV-DAG: OpDecorate %[[#SC1:]] SpecId 1 +; CHECK-SPIRV-DAG: OpDecorate %[[#SC2:]] SpecId 2 +; CHECK-SPIRV-DAG: OpDecorate %[[#SC3:]] SpecId 3 +; CHECK-SPIRV-DAG: OpDecorate %[[#SC4:]] SpecId 4 +; CHECK-SPIRV-DAG: OpDecorate %[[#SC5:]] SpecId 5 +; CHECK-SPIRV-DAG: OpDecorate %[[#SC6:]] SpecId 6 +; CHECK-SPIRV-DAG: OpDecorate %[[#SC7:]] SpecId 7 + +; CHECK-SPIRV-DAG: %[[#SC0]] = OpSpecConstantFalse %[[#]] +; CHECK-SPIRV-DAG: %[[#SC1]] = OpSpecConstant %[[#]] 100 +; CHECK-SPIRV-DAG: %[[#SC2]] = OpSpecConstant %[[#]] 1 +; CHECK-SPIRV-DAG: %[[#SC3]] = OpSpecConstant %[[#]] 2 +; CHECK-SPIRV-DAG: %[[#SC4]] = OpSpecConstant %[[#]] 3 0 +; CHECK-SPIRV-DAG: %[[#SC5]] = OpSpecConstant %[[#]] 14336 +; CHECK-SPIRV-DAG: %[[#SC6]] = OpSpecConstant %[[#]] 1067450368 +; CHECK-SPIRV-DAG: %[[#SC7]] = OpSpecConstant %[[#]] 0 1073807360 + +define spir_kernel void @foo(i8 addrspace(1)* nocapture %b, i8 addrspace(1)* nocapture %c, i16 addrspace(1)* nocapture %s, i32 addrspace(1)* nocapture %i, i64 addrspace(1)* nocapture %l, half addrspace(1)* nocapture %h, float addrspace(1)* nocapture %f, double addrspace(1)* nocapture %d) local_unnamed_addr { +entry: + %0 = call i1 @_Z20__spirv_SpecConstantib(i32 0, i1 false) + %conv = zext i1 %0 to i8 + store i8 %conv, i8 addrspace(1)* %b, align 1 + + %1 = call i8 @_Z20__spirv_SpecConstantia(i32 1, i8 100) + store i8 %1, i8 addrspace(1)* %c, align 1 + + %2 = call i16 @_Z20__spirv_SpecConstantis(i32 2, i16 1) + store i16 %2, i16 addrspace(1)* %s, align 2 + + %3 = call i32 @_Z20__spirv_SpecConstantii(i32 3, i32 2) + store i32 %3, i32 addrspace(1)* %i, align 4 + + %4 = call i64 @_Z20__spirv_SpecConstantix(i32 4, i64 3) + store i64 %4, i64 addrspace(1)* %l, align 8 + + %5 = call half @_Z20__spirv_SpecConstantih(i32 5, half 0xH3800) + store half %5, half addrspace(1)* %h, align 2 + + %6 = call float @_Z20__spirv_SpecConstantif(i32 6, float 1.250000e+00) + store float %6, float addrspace(1)* %f, align 4 + + %7 = call double @_Z20__spirv_SpecConstantid(i32 7, double 2.125000e+00) + store double %7, double addrspace(1)* %d, align 8 + ret void +} + +declare i1 @_Z20__spirv_SpecConstantib(i32, i1) +declare i8 @_Z20__spirv_SpecConstantia(i32, i8) +declare i16 @_Z20__spirv_SpecConstantis(i32, i16) +declare i32 @_Z20__spirv_SpecConstantii(i32, i32) +declare i64 @_Z20__spirv_SpecConstantix(i32, i64) +declare half @_Z20__spirv_SpecConstantih(i32, half) +declare float @_Z20__spirv_SpecConstantif(i32, float) +declare double @_Z20__spirv_SpecConstantid(i32, double) diff --git a/llvm/test/CodeGen/SPIRV/transcoding/sub_group_ballot.ll b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_ballot.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_ballot.ll @@ -0,0 +1,931 @@ +;; #pragma OPENCL EXTENSION cl_khr_subgroup_ballot : enable +;; #pragma OPENCL EXTENSION cl_khr_fp16 : enable +;; #pragma OPENCL EXTENSION cl_khr_fp64 : enable +;; +;; kernel void testNonUniformBroadcastChars() +;; { +;; char16 v = 0; +;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); +;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); +;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); +;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); +;; v = sub_group_non_uniform_broadcast(v, 0); +;; v.s0 = sub_group_broadcast_first(v.s0); +;; } +;; +;; kernel void testNonUniformBroadcastUChars() +;; { +;; uchar16 v = 0; +;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); +;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); +;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); +;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); +;; v = sub_group_non_uniform_broadcast(v, 0); +;; v.s0 = sub_group_broadcast_first(v.s0); +;; } +;; +;; kernel void testNonUniformBroadcastShorts() +;; { +;; short16 v = 0; +;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); +;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); +;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); +;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); +;; v = sub_group_non_uniform_broadcast(v, 0); +;; v.s0 = sub_group_broadcast_first(v.s0); +;; } +;; +;; kernel void testNonUniformBroadcastUShorts() +;; { +;; ushort16 v = 0; +;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); +;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); +;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); +;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); +;; v = sub_group_non_uniform_broadcast(v, 0); +;; v.s0 = sub_group_broadcast_first(v.s0); +;; } +;; +;; kernel void testNonUniformBroadcastInts() +;; { +;; int16 v = 0; +;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); +;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); +;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); +;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); +;; v = sub_group_non_uniform_broadcast(v, 0); +;; v.s0 = sub_group_broadcast_first(v.s0); +;; } +;; +;; kernel void testNonUniformBroadcastUInts() +;; { +;; uint16 v = 0; +;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); +;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); +;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); +;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); +;; v = sub_group_non_uniform_broadcast(v, 0); +;; v.s0 = sub_group_broadcast_first(v.s0); +;; } +;; +;; kernel void testNonUniformBroadcastLongs() +;; { +;; long16 v = 0; +;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); +;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); +;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); +;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); +;; v = sub_group_non_uniform_broadcast(v, 0); +;; v.s0 = sub_group_broadcast_first(v.s0); +;; } +;; +;; kernel void testNonUniformBroadcastULongs() +;; { +;; ulong16 v = 0; +;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); +;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); +;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); +;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); +;; v = sub_group_non_uniform_broadcast(v, 0); +;; v.s0 = sub_group_broadcast_first(v.s0); +;; } +;; +;; kernel void testNonUniformBroadcastFloats() +;; { +;; float16 v = 0; +;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); +;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); +;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); +;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); +;; v = sub_group_non_uniform_broadcast(v, 0); +;; v.s0 = sub_group_broadcast_first(v.s0); +;; } +;; +;; kernel void testNonUniformBroadcastHalfs() +;; { +;; half16 v = 0; +;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); +;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); +;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); +;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); +;; v = sub_group_non_uniform_broadcast(v, 0); +;; v.s0 = sub_group_broadcast_first(v.s0); +;; } +;; +;; kernel void testNonUniformBroadcastDoubles() +;; { +;; double16 v = 0; +;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); +;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); +;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); +;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); +;; v = sub_group_non_uniform_broadcast(v, 0); +;; v.s0 = sub_group_broadcast_first(v.s0); +;; } +;; +;; kernel void testBallotOperations(global uint* dst) +;; { +;; uint4 v = sub_group_ballot(0); +;; dst[0] = sub_group_inverse_ballot(v); +;; dst[1] = sub_group_ballot_bit_extract(v, 0); +;; dst[2] = sub_group_ballot_bit_count(v); +;; dst[3] = sub_group_ballot_inclusive_scan(v); +;; dst[4] = sub_group_ballot_exclusive_scan(v); +;; dst[5] = sub_group_ballot_find_lsb(v); +;; dst[6] = sub_group_ballot_find_msb(v); +;; } +;; +;; kernel void testSubgroupMasks(global uint4* dst) +;; { +;; dst[0] = get_sub_group_eq_mask(); +;; dst[1] = get_sub_group_ge_mask(); +;; dst[2] = get_sub_group_gt_mask(); +;; dst[3] = get_sub_group_le_mask(); +;; dst[4] = get_sub_group_lt_mask(); +;; } + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV-DAG: OpCapability GroupNonUniformBallot + +; CHECK-SPIRV-DAG: OpDecorate %[[#eqMask:]] BuiltIn SubgroupEqMask +; CHECK-SPIRV-DAG: OpDecorate %[[#geMask:]] BuiltIn SubgroupGeMask +; CHECK-SPIRV-DAG: OpDecorate %[[#gtMask:]] BuiltIn SubgroupGtMask +; CHECK-SPIRV-DAG: OpDecorate %[[#leMask:]] BuiltIn SubgroupLeMask +; CHECK-SPIRV-DAG: OpDecorate %[[#ltMask:]] BuiltIn SubgroupLtMask + +; CHECK-SPIRV-DAG: %[[#bool:]] = OpTypeBool +; CHECK-SPIRV-DAG: %[[#char:]] = OpTypeInt 8 0 +; CHECK-SPIRV-DAG: %[[#short:]] = OpTypeInt 16 0 +; CHECK-SPIRV-DAG: %[[#int:]] = OpTypeInt 32 0 +; CHECK-SPIRV-DAG: %[[#long:]] = OpTypeInt 64 0 +; CHECK-SPIRV-DAG: %[[#half:]] = OpTypeFloat 16 +; CHECK-SPIRV-DAG: %[[#float:]] = OpTypeFloat 32 +; CHECK-SPIRV-DAG: %[[#double:]] = OpTypeFloat 64 + +; CHECK-SPIRV-DAG: %[[#char2:]] = OpTypeVector %[[#char]] 2 +; CHECK-SPIRV-DAG: %[[#char3:]] = OpTypeVector %[[#char]] 3 +; CHECK-SPIRV-DAG: %[[#char4:]] = OpTypeVector %[[#char]] 4 +; CHECK-SPIRV-DAG: %[[#char8:]] = OpTypeVector %[[#char]] 8 +; CHECK-SPIRV-DAG: %[[#char16:]] = OpTypeVector %[[#char]] 16 + +; CHECK-SPIRV-DAG: %[[#short2:]] = OpTypeVector %[[#short]] 2 +; CHECK-SPIRV-DAG: %[[#short3:]] = OpTypeVector %[[#short]] 3 +; CHECK-SPIRV-DAG: %[[#short4:]] = OpTypeVector %[[#short]] 4 +; CHECK-SPIRV-DAG: %[[#short8:]] = OpTypeVector %[[#short]] 8 +; CHECK-SPIRV-DAG: %[[#short16:]] = OpTypeVector %[[#short]] 16 + +; CHECK-SPIRV-DAG: %[[#int2:]] = OpTypeVector %[[#int]] 2 +; CHECK-SPIRV-DAG: %[[#int3:]] = OpTypeVector %[[#int]] 3 +; CHECK-SPIRV-DAG: %[[#int4:]] = OpTypeVector %[[#int]] 4 +; CHECK-SPIRV-DAG: %[[#int8:]] = OpTypeVector %[[#int]] 8 +; CHECK-SPIRV-DAG: %[[#int16:]] = OpTypeVector %[[#int]] 16 + +; CHECK-SPIRV-DAG: %[[#long2:]] = OpTypeVector %[[#long]] 2 +; CHECK-SPIRV-DAG: %[[#long3:]] = OpTypeVector %[[#long]] 3 +; CHECK-SPIRV-DAG: %[[#long4:]] = OpTypeVector %[[#long]] 4 +; CHECK-SPIRV-DAG: %[[#long8:]] = OpTypeVector %[[#long]] 8 +; CHECK-SPIRV-DAG: %[[#long16:]] = OpTypeVector %[[#long]] 16 + +; CHECK-SPIRV-DAG: %[[#float2:]] = OpTypeVector %[[#float]] 2 +; CHECK-SPIRV-DAG: %[[#float3:]] = OpTypeVector %[[#float]] 3 +; CHECK-SPIRV-DAG: %[[#float4:]] = OpTypeVector %[[#float]] 4 +; CHECK-SPIRV-DAG: %[[#float8:]] = OpTypeVector %[[#float]] 8 +; CHECK-SPIRV-DAG: %[[#float16:]] = OpTypeVector %[[#float]] 16 + +; CHECK-SPIRV-DAG: %[[#half2:]] = OpTypeVector %[[#half]] 2 +; CHECK-SPIRV-DAG: %[[#half3:]] = OpTypeVector %[[#half]] 3 +; CHECK-SPIRV-DAG: %[[#half4:]] = OpTypeVector %[[#half]] 4 +; CHECK-SPIRV-DAG: %[[#half8:]] = OpTypeVector %[[#half]] 8 +; CHECK-SPIRV-DAG: %[[#half16:]] = OpTypeVector %[[#half]] 16 + +; CHECK-SPIRV-DAG: %[[#double2:]] = OpTypeVector %[[#double]] 2 +; CHECK-SPIRV-DAG: %[[#double3:]] = OpTypeVector %[[#double]] 3 +; CHECK-SPIRV-DAG: %[[#double4:]] = OpTypeVector %[[#double]] 4 +; CHECK-SPIRV-DAG: %[[#double8:]] = OpTypeVector %[[#double]] 8 +; CHECK-SPIRV-DAG: %[[#double16:]] = OpTypeVector %[[#double]] 16 + +; CHECK-SPIRV-DAG: %[[#false:]] = OpConstantFalse %[[#bool]] +; CHECK-SPIRV-DAG: %[[#ScopeSubgroup:]] = OpConstant %[[#int]] 3 +; CHECK-SPIRV-DAG: %[[#char_0:]] = OpConstant %[[#char]] 0 +; CHECK-SPIRV-DAG: %[[#short_0:]] = OpConstant %[[#short]] 0 +; CHECK-SPIRV-DAG: %[[#int_0:]] = OpConstant %[[#int]] 0 +; CHECK-SPIRV-DAG: %[[#long_0:]] = OpConstantNull %[[#long]] +; CHECK-SPIRV-DAG: %[[#half_0:]] = OpConstant %[[#half]] 0 +; CHECK-SPIRV-DAG: %[[#float_0:]] = OpConstant %[[#float]] 0 +; CHECK-SPIRV-DAG: %[[#double_0:]] = OpConstant %[[#double]] 0 + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#char2_0:]] = OpVectorShuffle %[[#char2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char2]] %[[#ScopeSubgroup]] %[[#char2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#char3_0:]] = OpVectorShuffle %[[#char3]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char3]] %[[#ScopeSubgroup]] %[[#char3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#char4_0:]] = OpVectorShuffle %[[#char4]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char4]] %[[#ScopeSubgroup]] %[[#char4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#char8_0:]] = OpVectorShuffle %[[#char8]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char8]] %[[#ScopeSubgroup]] %[[#char8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#char16]] +; CHECK-SPIRV: %[[#char16_0:]] = OpVectorShuffle %[[#char16]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char16]] %[[#ScopeSubgroup]] %[[#char16_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#char_value:]] = OpCompositeExtract %[[#char]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#char]] %[[#ScopeSubgroup]] %[[#char_value]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformBroadcastChars() local_unnamed_addr { + %1 = tail call spir_func signext i8 @_Z31sub_group_non_uniform_broadcastcj(i8 signext 0, i32 0) + %2 = insertelement <16 x i8> , i8 %1, i64 0 + %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <2 x i32> + %4 = tail call spir_func <2 x i8> @_Z31sub_group_non_uniform_broadcastDv2_cj(<2 x i8> %3, i32 0) + %5 = shufflevector <2 x i8> %4, <2 x i8> undef, <16 x i32> + %6 = shufflevector <16 x i8> %5, <16 x i8> %2, <16 x i32> + %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <3 x i32> + %8 = tail call spir_func <3 x i8> @_Z31sub_group_non_uniform_broadcastDv3_cj(<3 x i8> %7, i32 0) + %9 = shufflevector <3 x i8> %8, <3 x i8> undef, <16 x i32> + %10 = shufflevector <16 x i8> %9, <16 x i8> %6, <16 x i32> + %11 = shufflevector <16 x i8> %10, <16 x i8> undef, <4 x i32> + %12 = tail call spir_func <4 x i8> @_Z31sub_group_non_uniform_broadcastDv4_cj(<4 x i8> %11, i32 0) + %13 = shufflevector <4 x i8> %12, <4 x i8> undef, <16 x i32> + %14 = shufflevector <16 x i8> %13, <16 x i8> %10, <16 x i32> + %15 = shufflevector <16 x i8> %14, <16 x i8> undef, <8 x i32> + %16 = tail call spir_func <8 x i8> @_Z31sub_group_non_uniform_broadcastDv8_cj(<8 x i8> %15, i32 0) + %17 = shufflevector <8 x i8> %16, <8 x i8> undef, <16 x i32> + %18 = shufflevector <16 x i8> %17, <16 x i8> %14, <16 x i32> + %19 = tail call spir_func <16 x i8> @_Z31sub_group_non_uniform_broadcastDv16_cj(<16 x i8> %18, i32 0) + %20 = extractelement <16 x i8> %19, i64 0 + %21 = tail call spir_func signext i8 @_Z25sub_group_broadcast_firstc(i8 signext %20) + ret void +} + +declare dso_local spir_func signext i8 @_Z31sub_group_non_uniform_broadcastcj(i8 signext, i32) local_unnamed_addr + +declare dso_local spir_func <2 x i8> @_Z31sub_group_non_uniform_broadcastDv2_cj(<2 x i8>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x i8> @_Z31sub_group_non_uniform_broadcastDv3_cj(<3 x i8>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x i8> @_Z31sub_group_non_uniform_broadcastDv4_cj(<4 x i8>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x i8> @_Z31sub_group_non_uniform_broadcastDv8_cj(<8 x i8>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x i8> @_Z31sub_group_non_uniform_broadcastDv16_cj(<16 x i8>, i32) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z25sub_group_broadcast_firstc(i8 signext) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#char2_0:]] = OpVectorShuffle %[[#char2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char2]] %[[#ScopeSubgroup]] %[[#char2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#char3_0:]] = OpVectorShuffle %[[#char3]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char3]] %[[#ScopeSubgroup]] %[[#char3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#char4_0:]] = OpVectorShuffle %[[#char4]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char4]] %[[#ScopeSubgroup]] %[[#char4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#char8_0:]] = OpVectorShuffle %[[#char8]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char8]] %[[#ScopeSubgroup]] %[[#char8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#char16]] +; CHECK-SPIRV: %[[#char16_0:]] = OpVectorShuffle %[[#char16]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char16]] %[[#ScopeSubgroup]] %[[#char16_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#char_value:]] = OpCompositeExtract %[[#char]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#char]] %[[#ScopeSubgroup]] %[[#char_value]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformBroadcastUChars() local_unnamed_addr { + %1 = tail call spir_func zeroext i8 @_Z31sub_group_non_uniform_broadcasthj(i8 zeroext 0, i32 0) + %2 = insertelement <16 x i8> , i8 %1, i64 0 + %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <2 x i32> + %4 = tail call spir_func <2 x i8> @_Z31sub_group_non_uniform_broadcastDv2_hj(<2 x i8> %3, i32 0) + %5 = shufflevector <2 x i8> %4, <2 x i8> undef, <16 x i32> + %6 = shufflevector <16 x i8> %5, <16 x i8> %2, <16 x i32> + %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <3 x i32> + %8 = tail call spir_func <3 x i8> @_Z31sub_group_non_uniform_broadcastDv3_hj(<3 x i8> %7, i32 0) + %9 = shufflevector <3 x i8> %8, <3 x i8> undef, <16 x i32> + %10 = shufflevector <16 x i8> %9, <16 x i8> %6, <16 x i32> + %11 = shufflevector <16 x i8> %10, <16 x i8> undef, <4 x i32> + %12 = tail call spir_func <4 x i8> @_Z31sub_group_non_uniform_broadcastDv4_hj(<4 x i8> %11, i32 0) + %13 = shufflevector <4 x i8> %12, <4 x i8> undef, <16 x i32> + %14 = shufflevector <16 x i8> %13, <16 x i8> %10, <16 x i32> + %15 = shufflevector <16 x i8> %14, <16 x i8> undef, <8 x i32> + %16 = tail call spir_func <8 x i8> @_Z31sub_group_non_uniform_broadcastDv8_hj(<8 x i8> %15, i32 0) + %17 = shufflevector <8 x i8> %16, <8 x i8> undef, <16 x i32> + %18 = shufflevector <16 x i8> %17, <16 x i8> %14, <16 x i32> + %19 = tail call spir_func <16 x i8> @_Z31sub_group_non_uniform_broadcastDv16_hj(<16 x i8> %18, i32 0) + %20 = extractelement <16 x i8> %19, i64 0 + %21 = tail call spir_func zeroext i8 @_Z25sub_group_broadcast_firsth(i8 zeroext %20) + ret void +} + +declare dso_local spir_func zeroext i8 @_Z31sub_group_non_uniform_broadcasthj(i8 zeroext, i32) local_unnamed_addr + +declare dso_local spir_func <2 x i8> @_Z31sub_group_non_uniform_broadcastDv2_hj(<2 x i8>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x i8> @_Z31sub_group_non_uniform_broadcastDv3_hj(<3 x i8>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x i8> @_Z31sub_group_non_uniform_broadcastDv4_hj(<4 x i8>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x i8> @_Z31sub_group_non_uniform_broadcastDv8_hj(<8 x i8>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x i8> @_Z31sub_group_non_uniform_broadcastDv16_hj(<16 x i8>, i32) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z25sub_group_broadcast_firsth(i8 zeroext) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short]] %[[#ScopeSubgroup]] %[[#short_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#short2_0:]] = OpVectorShuffle %[[#short2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short2]] %[[#ScopeSubgroup]] %[[#short2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#short3_0:]] = OpVectorShuffle %[[#short3]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short3]] %[[#ScopeSubgroup]] %[[#short3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#short4_0:]] = OpVectorShuffle %[[#short4]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short4]] %[[#ScopeSubgroup]] %[[#short4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#short8_0:]] = OpVectorShuffle %[[#short8]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short8]] %[[#ScopeSubgroup]] %[[#short8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#short16]] +; CHECK-SPIRV: %[[#short16_0:]] = OpVectorShuffle %[[#short16]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short16]] %[[#ScopeSubgroup]] %[[#short16_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#short_value:]] = OpCompositeExtract %[[#short]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#short]] %[[#ScopeSubgroup]] %[[#short_value]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformBroadcastShorts() local_unnamed_addr { + %1 = tail call spir_func signext i16 @_Z31sub_group_non_uniform_broadcastsj(i16 signext 0, i32 0) + %2 = insertelement <16 x i16> , i16 %1, i64 0 + %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <2 x i32> + %4 = tail call spir_func <2 x i16> @_Z31sub_group_non_uniform_broadcastDv2_sj(<2 x i16> %3, i32 0) + %5 = shufflevector <2 x i16> %4, <2 x i16> undef, <16 x i32> + %6 = shufflevector <16 x i16> %5, <16 x i16> %2, <16 x i32> + %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <3 x i32> + %8 = tail call spir_func <3 x i16> @_Z31sub_group_non_uniform_broadcastDv3_sj(<3 x i16> %7, i32 0) + %9 = shufflevector <3 x i16> %8, <3 x i16> undef, <16 x i32> + %10 = shufflevector <16 x i16> %9, <16 x i16> %6, <16 x i32> + %11 = shufflevector <16 x i16> %10, <16 x i16> undef, <4 x i32> + %12 = tail call spir_func <4 x i16> @_Z31sub_group_non_uniform_broadcastDv4_sj(<4 x i16> %11, i32 0) + %13 = shufflevector <4 x i16> %12, <4 x i16> undef, <16 x i32> + %14 = shufflevector <16 x i16> %13, <16 x i16> %10, <16 x i32> + %15 = shufflevector <16 x i16> %14, <16 x i16> undef, <8 x i32> + %16 = tail call spir_func <8 x i16> @_Z31sub_group_non_uniform_broadcastDv8_sj(<8 x i16> %15, i32 0) + %17 = shufflevector <8 x i16> %16, <8 x i16> undef, <16 x i32> + %18 = shufflevector <16 x i16> %17, <16 x i16> %14, <16 x i32> + %19 = tail call spir_func <16 x i16> @_Z31sub_group_non_uniform_broadcastDv16_sj(<16 x i16> %18, i32 0) + %20 = extractelement <16 x i16> %19, i64 0 + %21 = tail call spir_func signext i16 @_Z25sub_group_broadcast_firsts(i16 signext %20) + ret void +} + +declare dso_local spir_func signext i16 @_Z31sub_group_non_uniform_broadcastsj(i16 signext, i32) local_unnamed_addr + +declare dso_local spir_func <2 x i16> @_Z31sub_group_non_uniform_broadcastDv2_sj(<2 x i16>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x i16> @_Z31sub_group_non_uniform_broadcastDv3_sj(<3 x i16>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x i16> @_Z31sub_group_non_uniform_broadcastDv4_sj(<4 x i16>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x i16> @_Z31sub_group_non_uniform_broadcastDv8_sj(<8 x i16>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x i16> @_Z31sub_group_non_uniform_broadcastDv16_sj(<16 x i16>, i32) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z25sub_group_broadcast_firsts(i16 signext) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short]] %[[#ScopeSubgroup]] %[[#short_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#short2_0:]] = OpVectorShuffle %[[#short2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short2]] %[[#ScopeSubgroup]] %[[#short2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#short3_0:]] = OpVectorShuffle %[[#short3]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short3]] %[[#ScopeSubgroup]] %[[#short3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#short4_0:]] = OpVectorShuffle %[[#short4]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short4]] %[[#ScopeSubgroup]] %[[#short4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#short8_0:]] = OpVectorShuffle %[[#short8]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short8]] %[[#ScopeSubgroup]] %[[#short8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#short16]] +; CHECK-SPIRV: %[[#short16_0:]] = OpVectorShuffle %[[#short16]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short16]] %[[#ScopeSubgroup]] %[[#short16_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#short_value:]] = OpCompositeExtract %[[#short]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#short]] %[[#ScopeSubgroup]] %[[#short_value]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformBroadcastUShorts() local_unnamed_addr { + %1 = tail call spir_func zeroext i16 @_Z31sub_group_non_uniform_broadcasttj(i16 zeroext 0, i32 0) + %2 = insertelement <16 x i16> , i16 %1, i64 0 + %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <2 x i32> + %4 = tail call spir_func <2 x i16> @_Z31sub_group_non_uniform_broadcastDv2_tj(<2 x i16> %3, i32 0) + %5 = shufflevector <2 x i16> %4, <2 x i16> undef, <16 x i32> + %6 = shufflevector <16 x i16> %5, <16 x i16> %2, <16 x i32> + %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <3 x i32> + %8 = tail call spir_func <3 x i16> @_Z31sub_group_non_uniform_broadcastDv3_tj(<3 x i16> %7, i32 0) + %9 = shufflevector <3 x i16> %8, <3 x i16> undef, <16 x i32> + %10 = shufflevector <16 x i16> %9, <16 x i16> %6, <16 x i32> + %11 = shufflevector <16 x i16> %10, <16 x i16> undef, <4 x i32> + %12 = tail call spir_func <4 x i16> @_Z31sub_group_non_uniform_broadcastDv4_tj(<4 x i16> %11, i32 0) + %13 = shufflevector <4 x i16> %12, <4 x i16> undef, <16 x i32> + %14 = shufflevector <16 x i16> %13, <16 x i16> %10, <16 x i32> + %15 = shufflevector <16 x i16> %14, <16 x i16> undef, <8 x i32> + %16 = tail call spir_func <8 x i16> @_Z31sub_group_non_uniform_broadcastDv8_tj(<8 x i16> %15, i32 0) + %17 = shufflevector <8 x i16> %16, <8 x i16> undef, <16 x i32> + %18 = shufflevector <16 x i16> %17, <16 x i16> %14, <16 x i32> + %19 = tail call spir_func <16 x i16> @_Z31sub_group_non_uniform_broadcastDv16_tj(<16 x i16> %18, i32 0) + %20 = extractelement <16 x i16> %19, i64 0 + %21 = tail call spir_func zeroext i16 @_Z25sub_group_broadcast_firstt(i16 zeroext %20) + ret void +} + +declare dso_local spir_func zeroext i16 @_Z31sub_group_non_uniform_broadcasttj(i16 zeroext, i32) local_unnamed_addr + +declare dso_local spir_func <2 x i16> @_Z31sub_group_non_uniform_broadcastDv2_tj(<2 x i16>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x i16> @_Z31sub_group_non_uniform_broadcastDv3_tj(<3 x i16>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x i16> @_Z31sub_group_non_uniform_broadcastDv4_tj(<4 x i16>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x i16> @_Z31sub_group_non_uniform_broadcastDv8_tj(<8 x i16>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x i16> @_Z31sub_group_non_uniform_broadcastDv16_tj(<16 x i16>, i32) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z25sub_group_broadcast_firstt(i16 zeroext) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int]] %[[#ScopeSubgroup]] %[[#int_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#int2_0:]] = OpVectorShuffle %[[#int2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int2]] %[[#ScopeSubgroup]] %[[#int2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#int3_0:]] = OpVectorShuffle %[[#int3]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int3]] %[[#ScopeSubgroup]] %[[#int3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#int4_0:]] = OpVectorShuffle %[[#int4]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int4]] %[[#ScopeSubgroup]] %[[#int4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#int8_0:]] = OpVectorShuffle %[[#int8]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int8]] %[[#ScopeSubgroup]] %[[#int8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#int16]] +; CHECK-SPIRV: %[[#int16_0:]] = OpVectorShuffle %[[#int16]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int16]] %[[#ScopeSubgroup]] %[[#int16_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#int_value:]] = OpCompositeExtract %[[#int]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#int]] %[[#ScopeSubgroup]] %[[#int_value]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformBroadcastInts() local_unnamed_addr { + %1 = tail call spir_func i32 @_Z31sub_group_non_uniform_broadcastij(i32 0, i32 0) + %2 = insertelement <16 x i32> , i32 %1, i64 0 + %3 = shufflevector <16 x i32> %2, <16 x i32> undef, <2 x i32> + %4 = tail call spir_func <2 x i32> @_Z31sub_group_non_uniform_broadcastDv2_ij(<2 x i32> %3, i32 0) + %5 = shufflevector <2 x i32> %4, <2 x i32> undef, <16 x i32> + %6 = shufflevector <16 x i32> %5, <16 x i32> %2, <16 x i32> + %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <3 x i32> + %8 = tail call spir_func <3 x i32> @_Z31sub_group_non_uniform_broadcastDv3_ij(<3 x i32> %7, i32 0) + %9 = shufflevector <3 x i32> %8, <3 x i32> undef, <16 x i32> + %10 = shufflevector <16 x i32> %9, <16 x i32> %6, <16 x i32> + %11 = shufflevector <16 x i32> %10, <16 x i32> undef, <4 x i32> + %12 = tail call spir_func <4 x i32> @_Z31sub_group_non_uniform_broadcastDv4_ij(<4 x i32> %11, i32 0) + %13 = shufflevector <4 x i32> %12, <4 x i32> undef, <16 x i32> + %14 = shufflevector <16 x i32> %13, <16 x i32> %10, <16 x i32> + %15 = shufflevector <16 x i32> %14, <16 x i32> undef, <8 x i32> + %16 = tail call spir_func <8 x i32> @_Z31sub_group_non_uniform_broadcastDv8_ij(<8 x i32> %15, i32 0) + %17 = shufflevector <8 x i32> %16, <8 x i32> undef, <16 x i32> + %18 = shufflevector <16 x i32> %17, <16 x i32> %14, <16 x i32> + %19 = tail call spir_func <16 x i32> @_Z31sub_group_non_uniform_broadcastDv16_ij(<16 x i32> %18, i32 0) + %20 = extractelement <16 x i32> %19, i64 0 + %21 = tail call spir_func i32 @_Z25sub_group_broadcast_firsti(i32 %20) + ret void +} + +declare dso_local spir_func i32 @_Z31sub_group_non_uniform_broadcastij(i32, i32) local_unnamed_addr + +declare dso_local spir_func <2 x i32> @_Z31sub_group_non_uniform_broadcastDv2_ij(<2 x i32>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x i32> @_Z31sub_group_non_uniform_broadcastDv3_ij(<3 x i32>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x i32> @_Z31sub_group_non_uniform_broadcastDv4_ij(<4 x i32>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x i32> @_Z31sub_group_non_uniform_broadcastDv8_ij(<8 x i32>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x i32> @_Z31sub_group_non_uniform_broadcastDv16_ij(<16 x i32>, i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z25sub_group_broadcast_firsti(i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int]] %[[#ScopeSubgroup]] %[[#int_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#int2_0:]] = OpVectorShuffle %[[#int2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int2]] %[[#ScopeSubgroup]] %[[#int2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#int3_0:]] = OpVectorShuffle %[[#int3]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int3]] %[[#ScopeSubgroup]] %[[#int3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#int4_0:]] = OpVectorShuffle %[[#int4]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int4]] %[[#ScopeSubgroup]] %[[#int4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#int8_0:]] = OpVectorShuffle %[[#int8]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int8]] %[[#ScopeSubgroup]] %[[#int8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#int16]] +; CHECK-SPIRV: %[[#int16_0:]] = OpVectorShuffle %[[#int16]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int16]] %[[#ScopeSubgroup]] %[[#int16_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#int_value:]] = OpCompositeExtract %[[#int]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#int]] %[[#ScopeSubgroup]] %[[#int_value]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformBroadcastUInts() local_unnamed_addr { + %1 = tail call spir_func i32 @_Z31sub_group_non_uniform_broadcastjj(i32 0, i32 0) + %2 = insertelement <16 x i32> , i32 %1, i64 0 + %3 = shufflevector <16 x i32> %2, <16 x i32> undef, <2 x i32> + %4 = tail call spir_func <2 x i32> @_Z31sub_group_non_uniform_broadcastDv2_jj(<2 x i32> %3, i32 0) + %5 = shufflevector <2 x i32> %4, <2 x i32> undef, <16 x i32> + %6 = shufflevector <16 x i32> %5, <16 x i32> %2, <16 x i32> + %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <3 x i32> + %8 = tail call spir_func <3 x i32> @_Z31sub_group_non_uniform_broadcastDv3_jj(<3 x i32> %7, i32 0) + %9 = shufflevector <3 x i32> %8, <3 x i32> undef, <16 x i32> + %10 = shufflevector <16 x i32> %9, <16 x i32> %6, <16 x i32> + %11 = shufflevector <16 x i32> %10, <16 x i32> undef, <4 x i32> + %12 = tail call spir_func <4 x i32> @_Z31sub_group_non_uniform_broadcastDv4_jj(<4 x i32> %11, i32 0) + %13 = shufflevector <4 x i32> %12, <4 x i32> undef, <16 x i32> + %14 = shufflevector <16 x i32> %13, <16 x i32> %10, <16 x i32> + %15 = shufflevector <16 x i32> %14, <16 x i32> undef, <8 x i32> + %16 = tail call spir_func <8 x i32> @_Z31sub_group_non_uniform_broadcastDv8_jj(<8 x i32> %15, i32 0) + %17 = shufflevector <8 x i32> %16, <8 x i32> undef, <16 x i32> + %18 = shufflevector <16 x i32> %17, <16 x i32> %14, <16 x i32> + %19 = tail call spir_func <16 x i32> @_Z31sub_group_non_uniform_broadcastDv16_jj(<16 x i32> %18, i32 0) + %20 = extractelement <16 x i32> %19, i64 0 + %21 = tail call spir_func i32 @_Z25sub_group_broadcast_firstj(i32 %20) + ret void +} + +declare dso_local spir_func i32 @_Z31sub_group_non_uniform_broadcastjj(i32, i32) local_unnamed_addr + +declare dso_local spir_func <2 x i32> @_Z31sub_group_non_uniform_broadcastDv2_jj(<2 x i32>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x i32> @_Z31sub_group_non_uniform_broadcastDv3_jj(<3 x i32>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x i32> @_Z31sub_group_non_uniform_broadcastDv4_jj(<4 x i32>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x i32> @_Z31sub_group_non_uniform_broadcastDv8_jj(<8 x i32>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x i32> @_Z31sub_group_non_uniform_broadcastDv16_jj(<16 x i32>, i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z25sub_group_broadcast_firstj(i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long]] %[[#ScopeSubgroup]] %[[#long_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#long2_0:]] = OpVectorShuffle %[[#long2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long2]] %[[#ScopeSubgroup]] %[[#long2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#long3_0:]] = OpVectorShuffle %[[#long3]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long3]] %[[#ScopeSubgroup]] %[[#long3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#long4_0:]] = OpVectorShuffle %[[#long4]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long4]] %[[#ScopeSubgroup]] %[[#long4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#long8_0:]] = OpVectorShuffle %[[#long8]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long8]] %[[#ScopeSubgroup]] %[[#long8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#long16]] +; CHECK-SPIRV: %[[#long16_0:]] = OpVectorShuffle %[[#long16]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long16]] %[[#ScopeSubgroup]] %[[#long16_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#long_value:]] = OpCompositeExtract %[[#long]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#long]] %[[#ScopeSubgroup]] %[[#long_value]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformBroadcastLongs() local_unnamed_addr { + %1 = tail call spir_func i64 @_Z31sub_group_non_uniform_broadcastlj(i64 0, i32 0) + %2 = insertelement <16 x i64> , i64 %1, i64 0 + %3 = shufflevector <16 x i64> %2, <16 x i64> undef, <2 x i32> + %4 = tail call spir_func <2 x i64> @_Z31sub_group_non_uniform_broadcastDv2_lj(<2 x i64> %3, i32 0) + %5 = shufflevector <2 x i64> %4, <2 x i64> undef, <16 x i32> + %6 = shufflevector <16 x i64> %5, <16 x i64> %2, <16 x i32> + %7 = shufflevector <16 x i64> %6, <16 x i64> undef, <3 x i32> + %8 = tail call spir_func <3 x i64> @_Z31sub_group_non_uniform_broadcastDv3_lj(<3 x i64> %7, i32 0) + %9 = shufflevector <3 x i64> %8, <3 x i64> undef, <16 x i32> + %10 = shufflevector <16 x i64> %9, <16 x i64> %6, <16 x i32> + %11 = shufflevector <16 x i64> %10, <16 x i64> undef, <4 x i32> + %12 = tail call spir_func <4 x i64> @_Z31sub_group_non_uniform_broadcastDv4_lj(<4 x i64> %11, i32 0) + %13 = shufflevector <4 x i64> %12, <4 x i64> undef, <16 x i32> + %14 = shufflevector <16 x i64> %13, <16 x i64> %10, <16 x i32> + %15 = shufflevector <16 x i64> %14, <16 x i64> undef, <8 x i32> + %16 = tail call spir_func <8 x i64> @_Z31sub_group_non_uniform_broadcastDv8_lj(<8 x i64> %15, i32 0) + %17 = shufflevector <8 x i64> %16, <8 x i64> undef, <16 x i32> + %18 = shufflevector <16 x i64> %17, <16 x i64> %14, <16 x i32> + %19 = tail call spir_func <16 x i64> @_Z31sub_group_non_uniform_broadcastDv16_lj(<16 x i64> %18, i32 0) + %20 = extractelement <16 x i64> %19, i64 0 + %21 = tail call spir_func i64 @_Z25sub_group_broadcast_firstl(i64 %20) + ret void +} + +declare dso_local spir_func i64 @_Z31sub_group_non_uniform_broadcastlj(i64, i32) local_unnamed_addr + +declare dso_local spir_func <2 x i64> @_Z31sub_group_non_uniform_broadcastDv2_lj(<2 x i64>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x i64> @_Z31sub_group_non_uniform_broadcastDv3_lj(<3 x i64>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x i64> @_Z31sub_group_non_uniform_broadcastDv4_lj(<4 x i64>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x i64> @_Z31sub_group_non_uniform_broadcastDv8_lj(<8 x i64>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x i64> @_Z31sub_group_non_uniform_broadcastDv16_lj(<16 x i64>, i32) local_unnamed_addr + +declare dso_local spir_func i64 @_Z25sub_group_broadcast_firstl(i64) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long]] %[[#ScopeSubgroup]] %[[#long_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#long2_0:]] = OpVectorShuffle %[[#long2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long2]] %[[#ScopeSubgroup]] %[[#long2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#long3_0:]] = OpVectorShuffle %[[#long3]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long3]] %[[#ScopeSubgroup]] %[[#long3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#long4_0:]] = OpVectorShuffle %[[#long4]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long4]] %[[#ScopeSubgroup]] %[[#long4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#long8_0:]] = OpVectorShuffle %[[#long8]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long8]] %[[#ScopeSubgroup]] %[[#long8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#long16]] +; CHECK-SPIRV: %[[#long16_0:]] = OpVectorShuffle %[[#long16]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long16]] %[[#ScopeSubgroup]] %[[#long16_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#long_value:]] = OpCompositeExtract %[[#long]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#long]] %[[#ScopeSubgroup]] %[[#long_value]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformBroadcastULongs() local_unnamed_addr { + %1 = tail call spir_func i64 @_Z31sub_group_non_uniform_broadcastmj(i64 0, i32 0) + %2 = insertelement <16 x i64> , i64 %1, i64 0 + %3 = shufflevector <16 x i64> %2, <16 x i64> undef, <2 x i32> + %4 = tail call spir_func <2 x i64> @_Z31sub_group_non_uniform_broadcastDv2_mj(<2 x i64> %3, i32 0) + %5 = shufflevector <2 x i64> %4, <2 x i64> undef, <16 x i32> + %6 = shufflevector <16 x i64> %5, <16 x i64> %2, <16 x i32> + %7 = shufflevector <16 x i64> %6, <16 x i64> undef, <3 x i32> + %8 = tail call spir_func <3 x i64> @_Z31sub_group_non_uniform_broadcastDv3_mj(<3 x i64> %7, i32 0) + %9 = shufflevector <3 x i64> %8, <3 x i64> undef, <16 x i32> + %10 = shufflevector <16 x i64> %9, <16 x i64> %6, <16 x i32> + %11 = shufflevector <16 x i64> %10, <16 x i64> undef, <4 x i32> + %12 = tail call spir_func <4 x i64> @_Z31sub_group_non_uniform_broadcastDv4_mj(<4 x i64> %11, i32 0) + %13 = shufflevector <4 x i64> %12, <4 x i64> undef, <16 x i32> + %14 = shufflevector <16 x i64> %13, <16 x i64> %10, <16 x i32> + %15 = shufflevector <16 x i64> %14, <16 x i64> undef, <8 x i32> + %16 = tail call spir_func <8 x i64> @_Z31sub_group_non_uniform_broadcastDv8_mj(<8 x i64> %15, i32 0) + %17 = shufflevector <8 x i64> %16, <8 x i64> undef, <16 x i32> + %18 = shufflevector <16 x i64> %17, <16 x i64> %14, <16 x i32> + %19 = tail call spir_func <16 x i64> @_Z31sub_group_non_uniform_broadcastDv16_mj(<16 x i64> %18, i32 0) + %20 = extractelement <16 x i64> %19, i64 0 + %21 = tail call spir_func i64 @_Z25sub_group_broadcast_firstm(i64 %20) + ret void +} + +declare dso_local spir_func i64 @_Z31sub_group_non_uniform_broadcastmj(i64, i32) local_unnamed_addr + +declare dso_local spir_func <2 x i64> @_Z31sub_group_non_uniform_broadcastDv2_mj(<2 x i64>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x i64> @_Z31sub_group_non_uniform_broadcastDv3_mj(<3 x i64>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x i64> @_Z31sub_group_non_uniform_broadcastDv4_mj(<4 x i64>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x i64> @_Z31sub_group_non_uniform_broadcastDv8_mj(<8 x i64>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x i64> @_Z31sub_group_non_uniform_broadcastDv16_mj(<16 x i64>, i32) local_unnamed_addr + +declare dso_local spir_func i64 @_Z25sub_group_broadcast_firstm(i64) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#float]] %[[#ScopeSubgroup]] %[[#float_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#float2_0:]] = OpVectorShuffle %[[#float2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#float2]] %[[#ScopeSubgroup]] %[[#float2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#float3_0:]] = OpVectorShuffle %[[#float3]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#float3]] %[[#ScopeSubgroup]] %[[#float3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#float4_0:]] = OpVectorShuffle %[[#float4]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#float4]] %[[#ScopeSubgroup]] %[[#float4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#float8_0:]] = OpVectorShuffle %[[#float8]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#float8]] %[[#ScopeSubgroup]] %[[#float8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#float16]] +; CHECK-SPIRV: %[[#float16_0:]] = OpVectorShuffle %[[#float16]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#float16]] %[[#ScopeSubgroup]] %[[#float16_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#float_value:]] = OpCompositeExtract %[[#float]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#float]] %[[#ScopeSubgroup]] %[[#float_value]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformBroadcastFloats() local_unnamed_addr { + %1 = tail call spir_func float @_Z31sub_group_non_uniform_broadcastfj(float 0.000000e+00, i32 0) + %2 = insertelement <16 x float> , float %1, i64 0 + %3 = shufflevector <16 x float> %2, <16 x float> undef, <2 x i32> + %4 = tail call spir_func <2 x float> @_Z31sub_group_non_uniform_broadcastDv2_fj(<2 x float> %3, i32 0) + %5 = shufflevector <2 x float> %4, <2 x float> undef, <16 x i32> + %6 = shufflevector <16 x float> %5, <16 x float> %2, <16 x i32> + %7 = shufflevector <16 x float> %6, <16 x float> undef, <3 x i32> + %8 = tail call spir_func <3 x float> @_Z31sub_group_non_uniform_broadcastDv3_fj(<3 x float> %7, i32 0) + %9 = shufflevector <3 x float> %8, <3 x float> undef, <16 x i32> + %10 = shufflevector <16 x float> %9, <16 x float> %6, <16 x i32> + %11 = shufflevector <16 x float> %10, <16 x float> undef, <4 x i32> + %12 = tail call spir_func <4 x float> @_Z31sub_group_non_uniform_broadcastDv4_fj(<4 x float> %11, i32 0) + %13 = shufflevector <4 x float> %12, <4 x float> undef, <16 x i32> + %14 = shufflevector <16 x float> %13, <16 x float> %10, <16 x i32> + %15 = shufflevector <16 x float> %14, <16 x float> undef, <8 x i32> + %16 = tail call spir_func <8 x float> @_Z31sub_group_non_uniform_broadcastDv8_fj(<8 x float> %15, i32 0) + %17 = shufflevector <8 x float> %16, <8 x float> undef, <16 x i32> + %18 = shufflevector <16 x float> %17, <16 x float> %14, <16 x i32> + %19 = tail call spir_func <16 x float> @_Z31sub_group_non_uniform_broadcastDv16_fj(<16 x float> %18, i32 0) + %20 = extractelement <16 x float> %19, i64 0 + %21 = tail call spir_func float @_Z25sub_group_broadcast_firstf(float %20) + ret void +} + +declare dso_local spir_func float @_Z31sub_group_non_uniform_broadcastfj(float, i32) local_unnamed_addr + +declare dso_local spir_func <2 x float> @_Z31sub_group_non_uniform_broadcastDv2_fj(<2 x float>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x float> @_Z31sub_group_non_uniform_broadcastDv3_fj(<3 x float>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x float> @_Z31sub_group_non_uniform_broadcastDv4_fj(<4 x float>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x float> @_Z31sub_group_non_uniform_broadcastDv8_fj(<8 x float>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x float> @_Z31sub_group_non_uniform_broadcastDv16_fj(<16 x float>, i32) local_unnamed_addr + +declare dso_local spir_func float @_Z25sub_group_broadcast_firstf(float) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#half]] %[[#ScopeSubgroup]] %[[#half_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#half2_0:]] = OpVectorShuffle %[[#half2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#half2]] %[[#ScopeSubgroup]] %[[#half2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#half3_0:]] = OpVectorShuffle %[[#half3]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#half3]] %[[#ScopeSubgroup]] %[[#half3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#half4_0:]] = OpVectorShuffle %[[#half4]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#half4]] %[[#ScopeSubgroup]] %[[#half4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#half8_0:]] = OpVectorShuffle %[[#half8]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#half8]] %[[#ScopeSubgroup]] %[[#half8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#half16]] +; CHECK-SPIRV: %[[#half16_0:]] = OpVectorShuffle %[[#half16]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#half16]] %[[#ScopeSubgroup]] %[[#half16_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#half_value:]] = OpCompositeExtract %[[#half]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#half]] %[[#ScopeSubgroup]] %[[#half_value]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformBroadcastHalfs() local_unnamed_addr { + %1 = tail call spir_func half @_Z31sub_group_non_uniform_broadcastDhj(half 0xH0000, i32 0) + %2 = insertelement <16 x half> , half %1, i64 0 + %3 = shufflevector <16 x half> %2, <16 x half> undef, <2 x i32> + %4 = tail call spir_func <2 x half> @_Z31sub_group_non_uniform_broadcastDv2_Dhj(<2 x half> %3, i32 0) + %5 = shufflevector <2 x half> %4, <2 x half> undef, <16 x i32> + %6 = shufflevector <16 x half> %5, <16 x half> %2, <16 x i32> + %7 = shufflevector <16 x half> %6, <16 x half> undef, <3 x i32> + %8 = tail call spir_func <3 x half> @_Z31sub_group_non_uniform_broadcastDv3_Dhj(<3 x half> %7, i32 0) + %9 = shufflevector <3 x half> %8, <3 x half> undef, <16 x i32> + %10 = shufflevector <16 x half> %9, <16 x half> %6, <16 x i32> + %11 = shufflevector <16 x half> %10, <16 x half> undef, <4 x i32> + %12 = tail call spir_func <4 x half> @_Z31sub_group_non_uniform_broadcastDv4_Dhj(<4 x half> %11, i32 0) + %13 = shufflevector <4 x half> %12, <4 x half> undef, <16 x i32> + %14 = shufflevector <16 x half> %13, <16 x half> %10, <16 x i32> + %15 = shufflevector <16 x half> %14, <16 x half> undef, <8 x i32> + %16 = tail call spir_func <8 x half> @_Z31sub_group_non_uniform_broadcastDv8_Dhj(<8 x half> %15, i32 0) + %17 = shufflevector <8 x half> %16, <8 x half> undef, <16 x i32> + %18 = shufflevector <16 x half> %17, <16 x half> %14, <16 x i32> + %19 = tail call spir_func <16 x half> @_Z31sub_group_non_uniform_broadcastDv16_Dhj(<16 x half> %18, i32 0) + %20 = extractelement <16 x half> %19, i64 0 + %21 = tail call spir_func half @_Z25sub_group_broadcast_firstDh(half %20) + ret void +} + +declare dso_local spir_func half @_Z31sub_group_non_uniform_broadcastDhj(half, i32) local_unnamed_addr + +declare dso_local spir_func <2 x half> @_Z31sub_group_non_uniform_broadcastDv2_Dhj(<2 x half>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x half> @_Z31sub_group_non_uniform_broadcastDv3_Dhj(<3 x half>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x half> @_Z31sub_group_non_uniform_broadcastDv4_Dhj(<4 x half>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x half> @_Z31sub_group_non_uniform_broadcastDv8_Dhj(<8 x half>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x half> @_Z31sub_group_non_uniform_broadcastDv16_Dhj(<16 x half>, i32) local_unnamed_addr + +declare dso_local spir_func half @_Z25sub_group_broadcast_firstDh(half) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#double]] %[[#ScopeSubgroup]] %[[#double_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#double2_0:]] = OpVectorShuffle %[[#double2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#double2]] %[[#ScopeSubgroup]] %[[#double2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#double3_0:]] = OpVectorShuffle %[[#double3]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#double3]] %[[#ScopeSubgroup]] %[[#double3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#double4_0:]] = OpVectorShuffle %[[#double4]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#double4]] %[[#ScopeSubgroup]] %[[#double4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#double8_0:]] = OpVectorShuffle %[[#double8]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#double8]] %[[#ScopeSubgroup]] %[[#double8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#double16]] +; CHECK-SPIRV: %[[#double16_0:]] = OpVectorShuffle %[[#double16]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#double16]] %[[#ScopeSubgroup]] %[[#double16_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#double_value:]] = OpCompositeExtract %[[#double]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#double]] %[[#ScopeSubgroup]] %[[#double_value]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformBroadcastDoubles() local_unnamed_addr { + %1 = tail call spir_func double @_Z31sub_group_non_uniform_broadcastdj(double 0.000000e+00, i32 0) + %2 = insertelement <16 x double> , double %1, i64 0 + %3 = shufflevector <16 x double> %2, <16 x double> undef, <2 x i32> + %4 = tail call spir_func <2 x double> @_Z31sub_group_non_uniform_broadcastDv2_dj(<2 x double> %3, i32 0) + %5 = shufflevector <2 x double> %4, <2 x double> undef, <16 x i32> + %6 = shufflevector <16 x double> %5, <16 x double> %2, <16 x i32> + %7 = shufflevector <16 x double> %6, <16 x double> undef, <3 x i32> + %8 = tail call spir_func <3 x double> @_Z31sub_group_non_uniform_broadcastDv3_dj(<3 x double> %7, i32 0) + %9 = shufflevector <3 x double> %8, <3 x double> undef, <16 x i32> + %10 = shufflevector <16 x double> %9, <16 x double> %6, <16 x i32> + %11 = shufflevector <16 x double> %10, <16 x double> undef, <4 x i32> + %12 = tail call spir_func <4 x double> @_Z31sub_group_non_uniform_broadcastDv4_dj(<4 x double> %11, i32 0) + %13 = shufflevector <4 x double> %12, <4 x double> undef, <16 x i32> + %14 = shufflevector <16 x double> %13, <16 x double> %10, <16 x i32> + %15 = shufflevector <16 x double> %14, <16 x double> undef, <8 x i32> + %16 = tail call spir_func <8 x double> @_Z31sub_group_non_uniform_broadcastDv8_dj(<8 x double> %15, i32 0) + %17 = shufflevector <8 x double> %16, <8 x double> undef, <16 x i32> + %18 = shufflevector <16 x double> %17, <16 x double> %14, <16 x i32> + %19 = tail call spir_func <16 x double> @_Z31sub_group_non_uniform_broadcastDv16_dj(<16 x double> %18, i32 0) + %20 = extractelement <16 x double> %19, i64 0 + %21 = tail call spir_func double @_Z25sub_group_broadcast_firstd(double %20) + ret void +} + +declare dso_local spir_func double @_Z31sub_group_non_uniform_broadcastdj(double, i32) local_unnamed_addr + +declare dso_local spir_func <2 x double> @_Z31sub_group_non_uniform_broadcastDv2_dj(<2 x double>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x double> @_Z31sub_group_non_uniform_broadcastDv3_dj(<3 x double>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x double> @_Z31sub_group_non_uniform_broadcastDv4_dj(<4 x double>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x double> @_Z31sub_group_non_uniform_broadcastDv8_dj(<8 x double>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x double> @_Z31sub_group_non_uniform_broadcastDv16_dj(<16 x double>, i32) local_unnamed_addr + +declare dso_local spir_func double @_Z25sub_group_broadcast_firstd(double) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#ballot:]] = OpGroupNonUniformBallot %[[#int4]] %[[#ScopeSubgroup]] %[[#false]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformInverseBallot %[[#bool]] %[[#ScopeSubgroup]] %[[#ballot]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitExtract %[[#bool]] %[[#ScopeSubgroup]] %[[#ballot]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitCount %[[#int]] %[[#ScopeSubgroup]] Reduce %[[#ballot]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitCount %[[#int]] %[[#ScopeSubgroup]] InclusiveScan %[[#ballot]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitCount %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#ballot]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotFindLSB %[[#int]] %[[#ScopeSubgroup]] %[[#ballot]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotFindMSB %[[#int]] %[[#ScopeSubgroup]] %[[#ballot]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testBallotOperations(i32 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func <4 x i32> @_Z16sub_group_balloti(i32 0) + %3 = tail call spir_func i32 @_Z24sub_group_inverse_ballotDv4_j(<4 x i32> %2) + store i32 %3, i32 addrspace(1)* %0, align 4 + %4 = tail call spir_func i32 @_Z28sub_group_ballot_bit_extractDv4_jj(<4 x i32> %2, i32 0) + %5 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 + store i32 %4, i32 addrspace(1)* %5, align 4 + %6 = tail call spir_func i32 @_Z26sub_group_ballot_bit_countDv4_j(<4 x i32> %2) + %7 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2 + store i32 %6, i32 addrspace(1)* %7, align 4 + %8 = tail call spir_func i32 @_Z31sub_group_ballot_inclusive_scanDv4_j(<4 x i32> %2) + %9 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 3 + store i32 %8, i32 addrspace(1)* %9, align 4 + %10 = tail call spir_func i32 @_Z31sub_group_ballot_exclusive_scanDv4_j(<4 x i32> %2) + %11 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 4 + store i32 %10, i32 addrspace(1)* %11, align 4 + %12 = tail call spir_func i32 @_Z25sub_group_ballot_find_lsbDv4_j(<4 x i32> %2) + %13 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 5 + store i32 %12, i32 addrspace(1)* %13, align 4 + %14 = tail call spir_func i32 @_Z25sub_group_ballot_find_msbDv4_j(<4 x i32> %2) + %15 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 6 + store i32 %14, i32 addrspace(1)* %15, align 4 + ret void +} + +declare dso_local spir_func <4 x i32> @_Z16sub_group_balloti(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z24sub_group_inverse_ballotDv4_j(<4 x i32>) local_unnamed_addr + +declare dso_local spir_func i32 @_Z28sub_group_ballot_bit_extractDv4_jj(<4 x i32>, i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z26sub_group_ballot_bit_countDv4_j(<4 x i32>) local_unnamed_addr + +declare dso_local spir_func i32 @_Z31sub_group_ballot_inclusive_scanDv4_j(<4 x i32>) local_unnamed_addr + +declare dso_local spir_func i32 @_Z31sub_group_ballot_exclusive_scanDv4_j(<4 x i32>) local_unnamed_addr + +declare dso_local spir_func i32 @_Z25sub_group_ballot_find_lsbDv4_j(<4 x i32>) local_unnamed_addr + +declare dso_local spir_func i32 @_Z25sub_group_ballot_find_msbDv4_j(<4 x i32>) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpLoad %[[#int4]] %[[#eqMask]] +; CHECK-SPIRV: %[[#]] = OpLoad %[[#int4]] %[[#geMask]] +; CHECK-SPIRV: %[[#]] = OpLoad %[[#int4]] %[[#gtMask]] +; CHECK-SPIRV: %[[#]] = OpLoad %[[#int4]] %[[#leMask]] +; CHECK-SPIRV: %[[#]] = OpLoad %[[#int4]] %[[#ltMask]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testSubgroupMasks(<4 x i32> addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func <4 x i32> @_Z21get_sub_group_eq_maskv() + store <4 x i32> %2, <4 x i32> addrspace(1)* %0, align 16 + %3 = tail call spir_func <4 x i32> @_Z21get_sub_group_ge_maskv() + %4 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %0, i64 1 + store <4 x i32> %3, <4 x i32> addrspace(1)* %4, align 16 + %5 = tail call spir_func <4 x i32> @_Z21get_sub_group_gt_maskv() + %6 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %0, i64 2 + store <4 x i32> %5, <4 x i32> addrspace(1)* %6, align 16 + %7 = tail call spir_func <4 x i32> @_Z21get_sub_group_le_maskv() + %8 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %0, i64 3 + store <4 x i32> %7, <4 x i32> addrspace(1)* %8, align 16 + %9 = tail call spir_func <4 x i32> @_Z21get_sub_group_lt_maskv() + %10 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %0, i64 4 + store <4 x i32> %9, <4 x i32> addrspace(1)* %10, align 16 + ret void +} + +declare dso_local spir_func <4 x i32> @_Z21get_sub_group_eq_maskv() local_unnamed_addr + +declare dso_local spir_func <4 x i32> @_Z21get_sub_group_ge_maskv() local_unnamed_addr + +declare dso_local spir_func <4 x i32> @_Z21get_sub_group_gt_maskv() local_unnamed_addr + +declare dso_local spir_func <4 x i32> @_Z21get_sub_group_le_maskv() local_unnamed_addr + +declare dso_local spir_func <4 x i32> @_Z21get_sub_group_lt_maskv() local_unnamed_addr diff --git a/llvm/test/CodeGen/SPIRV/transcoding/sub_group_clustered_reduce.ll b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_clustered_reduce.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_clustered_reduce.ll @@ -0,0 +1,744 @@ +;; #pragma OPENCL EXTENSION cl_khr_subgroup_clustered_reduce : enable +;; #pragma OPENCL EXTENSION cl_khr_fp16 : enable +;; #pragma OPENCL EXTENSION cl_khr_fp64 : enable +;; +;; kernel void testClusteredArithmeticChar(global char* dst) +;; { +;; char v = 0; +;; dst[0] = sub_group_clustered_reduce_add(v, 2); +;; dst[1] = sub_group_clustered_reduce_mul(v, 2); +;; dst[2] = sub_group_clustered_reduce_min(v, 2); +;; dst[3] = sub_group_clustered_reduce_max(v, 2); +;; } +;; +;; kernel void testClusteredArithmeticUChar(global uchar* dst) +;; { +;; uchar v = 0; +;; dst[0] = sub_group_clustered_reduce_add(v, 2); +;; dst[1] = sub_group_clustered_reduce_mul(v, 2); +;; dst[2] = sub_group_clustered_reduce_min(v, 2); +;; dst[3] = sub_group_clustered_reduce_max(v, 2); +;; } +;; +;; kernel void testClusteredArithmeticShort(global short* dst) +;; { +;; short v = 0; +;; dst[0] = sub_group_clustered_reduce_add(v, 2); +;; dst[1] = sub_group_clustered_reduce_mul(v, 2); +;; dst[2] = sub_group_clustered_reduce_min(v, 2); +;; dst[3] = sub_group_clustered_reduce_max(v, 2); +;; } +;; +;; kernel void testClusteredArithmeticUShort(global ushort* dst) +;; { +;; ushort v = 0; +;; dst[0] = sub_group_clustered_reduce_add(v, 2); +;; dst[1] = sub_group_clustered_reduce_mul(v, 2); +;; dst[2] = sub_group_clustered_reduce_min(v, 2); +;; dst[3] = sub_group_clustered_reduce_max(v, 2); +;; } +;; +;; kernel void testClusteredArithmeticInt(global int* dst) +;; { +;; int v = 0; +;; dst[0] = sub_group_clustered_reduce_add(v, 2); +;; dst[1] = sub_group_clustered_reduce_mul(v, 2); +;; dst[2] = sub_group_clustered_reduce_min(v, 2); +;; dst[3] = sub_group_clustered_reduce_max(v, 2); +;; } +;; +;; kernel void testClusteredArithmeticUInt(global uint* dst) +;; { +;; uint v = 0; +;; dst[0] = sub_group_clustered_reduce_add(v, 2); +;; dst[1] = sub_group_clustered_reduce_mul(v, 2); +;; dst[2] = sub_group_clustered_reduce_min(v, 2); +;; dst[3] = sub_group_clustered_reduce_max(v, 2); +;; } +;; +;; kernel void testClusteredArithmeticLong(global long* dst) +;; { +;; long v = 0; +;; dst[0] = sub_group_clustered_reduce_add(v, 2); +;; dst[1] = sub_group_clustered_reduce_mul(v, 2); +;; dst[2] = sub_group_clustered_reduce_min(v, 2); +;; dst[3] = sub_group_clustered_reduce_max(v, 2); +;; } +;; +;; kernel void testClusteredArithmeticULong(global ulong* dst) +;; { +;; ulong v = 0; +;; dst[0] = sub_group_clustered_reduce_add(v, 2); +;; dst[1] = sub_group_clustered_reduce_mul(v, 2); +;; dst[2] = sub_group_clustered_reduce_min(v, 2); +;; dst[3] = sub_group_clustered_reduce_max(v, 2); +;; } +;; +;; kernel void testClusteredArithmeticFloat(global float* dst) +;; { +;; float v = 0; +;; dst[0] = sub_group_clustered_reduce_add(v, 2); +;; dst[1] = sub_group_clustered_reduce_mul(v, 2); +;; dst[2] = sub_group_clustered_reduce_min(v, 2); +;; dst[3] = sub_group_clustered_reduce_max(v, 2); +;; } +;; +;; kernel void testClusteredArithmeticHalf(global half* dst) +;; { +;; half v = 0; +;; dst[0] = sub_group_clustered_reduce_add(v, 2); +;; dst[1] = sub_group_clustered_reduce_mul(v, 2); +;; dst[2] = sub_group_clustered_reduce_min(v, 2); +;; dst[3] = sub_group_clustered_reduce_max(v, 2); +;; } +;; +;; kernel void testClusteredArithmeticDouble(global double* dst) +;; { +;; double v = 0; +;; dst[0] = sub_group_clustered_reduce_add(v, 2); +;; dst[1] = sub_group_clustered_reduce_mul(v, 2); +;; dst[2] = sub_group_clustered_reduce_min(v, 2); +;; dst[3] = sub_group_clustered_reduce_max(v, 2); +;; } +;; +;; kernel void testClusteredBitwiseChar(global char* dst) +;; { +;; char v = 0; +;; dst[0] = sub_group_clustered_reduce_and(v, 2); +;; dst[1] = sub_group_clustered_reduce_or(v, 2); +;; dst[2] = sub_group_clustered_reduce_xor(v, 2); +;; } +;; +;; kernel void testClusteredBitwiseUChar(global uchar* dst) +;; { +;; uchar v = 0; +;; dst[0] = sub_group_clustered_reduce_and(v, 2); +;; dst[1] = sub_group_clustered_reduce_or(v, 2); +;; dst[2] = sub_group_clustered_reduce_xor(v, 2); +;; } +;; +;; kernel void testClusteredBitwiseShort(global short* dst) +;; { +;; short v = 0; +;; dst[0] = sub_group_clustered_reduce_and(v, 2); +;; dst[1] = sub_group_clustered_reduce_or(v, 2); +;; dst[2] = sub_group_clustered_reduce_xor(v, 2); +;; } +;; +;; kernel void testClusteredBitwiseUShort(global ushort* dst) +;; { +;; ushort v = 0; +;; dst[0] = sub_group_clustered_reduce_and(v, 2); +;; dst[1] = sub_group_clustered_reduce_or(v, 2); +;; dst[2] = sub_group_clustered_reduce_xor(v, 2); +;; } +;; +;; kernel void testClusteredBitwiseInt(global int* dst) +;; { +;; int v = 0; +;; dst[0] = sub_group_clustered_reduce_and(v, 2); +;; dst[1] = sub_group_clustered_reduce_or(v, 2); +;; dst[2] = sub_group_clustered_reduce_xor(v, 2); +;; } +;; +;; kernel void testClusteredBitwiseUInt(global uint* dst) +;; { +;; uint v = 0; +;; dst[0] = sub_group_clustered_reduce_and(v, 2); +;; dst[1] = sub_group_clustered_reduce_or(v, 2); +;; dst[2] = sub_group_clustered_reduce_xor(v, 2); +;; } +;; +;; kernel void testClusteredBitwiseLong(global long* dst) +;; { +;; long v = 0; +;; dst[0] = sub_group_clustered_reduce_and(v, 2); +;; dst[1] = sub_group_clustered_reduce_or(v, 2); +;; dst[2] = sub_group_clustered_reduce_xor(v, 2); +;; } +;; +;; kernel void testClusteredBitwiseULong(global ulong* dst) +;; { +;; ulong v = 0; +;; dst[0] = sub_group_clustered_reduce_and(v, 2); +;; dst[1] = sub_group_clustered_reduce_or(v, 2); +;; dst[2] = sub_group_clustered_reduce_xor(v, 2); +;; } +;; +;; kernel void testClusteredLogical(global int* dst) +;; { +;; int v = 0; +;; dst[0] = sub_group_clustered_reduce_logical_and(v, 2); +;; dst[1] = sub_group_clustered_reduce_logical_or(v, 2); +;; dst[2] = sub_group_clustered_reduce_logical_xor(v, 2); +;; } + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV-DAG: OpCapability GroupNonUniformClustered + +; CHECK-SPIRV-DAG: %[[#bool:]] = OpTypeBool +; CHECK-SPIRV-DAG: %[[#char:]] = OpTypeInt 8 0 +; CHECK-SPIRV-DAG: %[[#short:]] = OpTypeInt 16 0 +; CHECK-SPIRV-DAG: %[[#int:]] = OpTypeInt 32 0 +; CHECK-SPIRV-DAG: %[[#long:]] = OpTypeInt 64 0 +; CHECK-SPIRV-DAG: %[[#half:]] = OpTypeFloat 16 +; CHECK-SPIRV-DAG: %[[#float:]] = OpTypeFloat 32 +; CHECK-SPIRV-DAG: %[[#double:]] = OpTypeFloat 64 + +; CHECK-SPIRV-DAG: %[[#false:]] = OpConstantFalse %[[#bool]] +; CHECK-SPIRV-DAG: %[[#ScopeSubgroup:]] = OpConstant %[[#int]] 3 +; CHECK-SPIRV-DAG: %[[#char_0:]] = OpConstant %[[#char]] 0 +; CHECK-SPIRV-DAG: %[[#short_0:]] = OpConstant %[[#short]] 0 +; CHECK-SPIRV-DAG: %[[#int_0:]] = OpConstant %[[#int]] 0 +; CHECK-SPIRV-DAG: %[[#int_2:]] = OpConstant %[[#int]] 2 +; CHECK-SPIRV-DAG: %[[#long_0:]] = OpConstantNull %[[#long]] +; CHECK-SPIRV-DAG: %[[#half_0:]] = OpConstant %[[#half]] 0 +; CHECK-SPIRV-DAG: %[[#float_0:]] = OpConstant %[[#float]] 0 +; CHECK-SPIRV-DAG: %[[#double_0:]] = OpConstant %[[#double]] 0 + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredArithmeticChar(i8 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func signext i8 @_Z30sub_group_clustered_reduce_addcj(i8 signext 0, i32 2) + store i8 %2, i8 addrspace(1)* %0, align 1 + %3 = tail call spir_func signext i8 @_Z30sub_group_clustered_reduce_mulcj(i8 signext 0, i32 2) + %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1 + store i8 %3, i8 addrspace(1)* %4, align 1 + %5 = tail call spir_func signext i8 @_Z30sub_group_clustered_reduce_mincj(i8 signext 0, i32 2) + %6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2 + store i8 %5, i8 addrspace(1)* %6, align 1 + %7 = tail call spir_func signext i8 @_Z30sub_group_clustered_reduce_maxcj(i8 signext 0, i32 2) + %8 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 3 + store i8 %7, i8 addrspace(1)* %8, align 1 + ret void +} + +declare dso_local spir_func signext i8 @_Z30sub_group_clustered_reduce_addcj(i8 signext, i32) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z30sub_group_clustered_reduce_mulcj(i8 signext, i32) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z30sub_group_clustered_reduce_mincj(i8 signext, i32) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z30sub_group_clustered_reduce_maxcj(i8 signext, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredArithmeticUChar(i8 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func zeroext i8 @_Z30sub_group_clustered_reduce_addhj(i8 zeroext 0, i32 2) + store i8 %2, i8 addrspace(1)* %0, align 1 + %3 = tail call spir_func zeroext i8 @_Z30sub_group_clustered_reduce_mulhj(i8 zeroext 0, i32 2) + %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1 + store i8 %3, i8 addrspace(1)* %4, align 1 + %5 = tail call spir_func zeroext i8 @_Z30sub_group_clustered_reduce_minhj(i8 zeroext 0, i32 2) + %6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2 + store i8 %5, i8 addrspace(1)* %6, align 1 + %7 = tail call spir_func zeroext i8 @_Z30sub_group_clustered_reduce_maxhj(i8 zeroext 0, i32 2) + %8 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 3 + store i8 %7, i8 addrspace(1)* %8, align 1 + ret void +} + +declare dso_local spir_func zeroext i8 @_Z30sub_group_clustered_reduce_addhj(i8 zeroext, i32) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z30sub_group_clustered_reduce_mulhj(i8 zeroext, i32) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z30sub_group_clustered_reduce_minhj(i8 zeroext, i32) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z30sub_group_clustered_reduce_maxhj(i8 zeroext, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredArithmeticShort(i16 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func signext i16 @_Z30sub_group_clustered_reduce_addsj(i16 signext 0, i32 2) + store i16 %2, i16 addrspace(1)* %0, align 2 + %3 = tail call spir_func signext i16 @_Z30sub_group_clustered_reduce_mulsj(i16 signext 0, i32 2) + %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1 + store i16 %3, i16 addrspace(1)* %4, align 2 + %5 = tail call spir_func signext i16 @_Z30sub_group_clustered_reduce_minsj(i16 signext 0, i32 2) + %6 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 2 + store i16 %5, i16 addrspace(1)* %6, align 2 + %7 = tail call spir_func signext i16 @_Z30sub_group_clustered_reduce_maxsj(i16 signext 0, i32 2) + %8 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 3 + store i16 %7, i16 addrspace(1)* %8, align 2 + ret void +} + +declare dso_local spir_func signext i16 @_Z30sub_group_clustered_reduce_addsj(i16 signext, i32) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z30sub_group_clustered_reduce_mulsj(i16 signext, i32) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z30sub_group_clustered_reduce_minsj(i16 signext, i32) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z30sub_group_clustered_reduce_maxsj(i16 signext, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredArithmeticUShort(i16 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func zeroext i16 @_Z30sub_group_clustered_reduce_addtj(i16 zeroext 0, i32 2) + store i16 %2, i16 addrspace(1)* %0, align 2 + %3 = tail call spir_func zeroext i16 @_Z30sub_group_clustered_reduce_multj(i16 zeroext 0, i32 2) + %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1 + store i16 %3, i16 addrspace(1)* %4, align 2 + %5 = tail call spir_func zeroext i16 @_Z30sub_group_clustered_reduce_mintj(i16 zeroext 0, i32 2) + %6 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 2 + store i16 %5, i16 addrspace(1)* %6, align 2 + %7 = tail call spir_func zeroext i16 @_Z30sub_group_clustered_reduce_maxtj(i16 zeroext 0, i32 2) + %8 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 3 + store i16 %7, i16 addrspace(1)* %8, align 2 + ret void +} + +declare dso_local spir_func zeroext i16 @_Z30sub_group_clustered_reduce_addtj(i16 zeroext, i32) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z30sub_group_clustered_reduce_multj(i16 zeroext, i32) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z30sub_group_clustered_reduce_mintj(i16 zeroext, i32) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z30sub_group_clustered_reduce_maxtj(i16 zeroext, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredArithmeticInt(i32 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_addij(i32 0, i32 2) + store i32 %2, i32 addrspace(1)* %0, align 4 + %3 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_mulij(i32 0, i32 2) + %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 + store i32 %3, i32 addrspace(1)* %4, align 4 + %5 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_minij(i32 0, i32 2) + %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2 + store i32 %5, i32 addrspace(1)* %6, align 4 + %7 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_maxij(i32 0, i32 2) + %8 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 3 + store i32 %7, i32 addrspace(1)* %8, align 4 + ret void +} + +declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_addij(i32, i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_mulij(i32, i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_minij(i32, i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_maxij(i32, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredArithmeticUInt(i32 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_addjj(i32 0, i32 2) + store i32 %2, i32 addrspace(1)* %0, align 4 + %3 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_muljj(i32 0, i32 2) + %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 + store i32 %3, i32 addrspace(1)* %4, align 4 + %5 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_minjj(i32 0, i32 2) + %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2 + store i32 %5, i32 addrspace(1)* %6, align 4 + %7 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_maxjj(i32 0, i32 2) + %8 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 3 + store i32 %7, i32 addrspace(1)* %8, align 4 + ret void +} + +declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_addjj(i32, i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_muljj(i32, i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_minjj(i32, i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_maxjj(i32, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredArithmeticLong(i64 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_addlj(i64 0, i32 2) + store i64 %2, i64 addrspace(1)* %0, align 8 + %3 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_mullj(i64 0, i32 2) + %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1 + store i64 %3, i64 addrspace(1)* %4, align 8 + %5 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_minlj(i64 0, i32 2) + %6 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 2 + store i64 %5, i64 addrspace(1)* %6, align 8 + %7 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_maxlj(i64 0, i32 2) + %8 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 3 + store i64 %7, i64 addrspace(1)* %8, align 8 + ret void +} + +declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_addlj(i64, i32) local_unnamed_addr + +declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_mullj(i64, i32) local_unnamed_addr + +declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_minlj(i64, i32) local_unnamed_addr + +declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_maxlj(i64, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredArithmeticULong(i64 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_addmj(i64 0, i32 2) + store i64 %2, i64 addrspace(1)* %0, align 8 + %3 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_mulmj(i64 0, i32 2) + %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1 + store i64 %3, i64 addrspace(1)* %4, align 8 + %5 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_minmj(i64 0, i32 2) + %6 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 2 + store i64 %5, i64 addrspace(1)* %6, align 8 + %7 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_maxmj(i64 0, i32 2) + %8 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 3 + store i64 %7, i64 addrspace(1)* %8, align 8 + ret void +} + +declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_addmj(i64, i32) local_unnamed_addr + +declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_mulmj(i64, i32) local_unnamed_addr + +declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_minmj(i64, i32) local_unnamed_addr + +declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_maxmj(i64, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#float]] %[[#ScopeSubgroup]] ClusteredReduce %[[#float_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#float]] %[[#ScopeSubgroup]] ClusteredReduce %[[#float_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#float]] %[[#ScopeSubgroup]] ClusteredReduce %[[#float_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMax %[[#float]] %[[#ScopeSubgroup]] ClusteredReduce %[[#float_0]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredArithmeticFloat(float addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func float @_Z30sub_group_clustered_reduce_addfj(float 0.000000e+00, i32 2) + store float %2, float addrspace(1)* %0, align 4 + %3 = tail call spir_func float @_Z30sub_group_clustered_reduce_mulfj(float 0.000000e+00, i32 2) + %4 = getelementptr inbounds float, float addrspace(1)* %0, i64 1 + store float %3, float addrspace(1)* %4, align 4 + %5 = tail call spir_func float @_Z30sub_group_clustered_reduce_minfj(float 0.000000e+00, i32 2) + %6 = getelementptr inbounds float, float addrspace(1)* %0, i64 2 + store float %5, float addrspace(1)* %6, align 4 + %7 = tail call spir_func float @_Z30sub_group_clustered_reduce_maxfj(float 0.000000e+00, i32 2) + %8 = getelementptr inbounds float, float addrspace(1)* %0, i64 3 + store float %7, float addrspace(1)* %8, align 4 + ret void +} + +declare dso_local spir_func float @_Z30sub_group_clustered_reduce_addfj(float, i32) local_unnamed_addr + +declare dso_local spir_func float @_Z30sub_group_clustered_reduce_mulfj(float, i32) local_unnamed_addr + +declare dso_local spir_func float @_Z30sub_group_clustered_reduce_minfj(float, i32) local_unnamed_addr + +declare dso_local spir_func float @_Z30sub_group_clustered_reduce_maxfj(float, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#half]] %[[#ScopeSubgroup]] ClusteredReduce %[[#half_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#half]] %[[#ScopeSubgroup]] ClusteredReduce %[[#half_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#half]] %[[#ScopeSubgroup]] ClusteredReduce %[[#half_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMax %[[#half]] %[[#ScopeSubgroup]] ClusteredReduce %[[#half_0]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredArithmeticHalf(half addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func half @_Z30sub_group_clustered_reduce_addDhj(half 0xH0000, i32 2) + store half %2, half addrspace(1)* %0, align 2 + %3 = tail call spir_func half @_Z30sub_group_clustered_reduce_mulDhj(half 0xH0000, i32 2) + %4 = getelementptr inbounds half, half addrspace(1)* %0, i64 1 + store half %3, half addrspace(1)* %4, align 2 + %5 = tail call spir_func half @_Z30sub_group_clustered_reduce_minDhj(half 0xH0000, i32 2) + %6 = getelementptr inbounds half, half addrspace(1)* %0, i64 2 + store half %5, half addrspace(1)* %6, align 2 + %7 = tail call spir_func half @_Z30sub_group_clustered_reduce_maxDhj(half 0xH0000, i32 2) + %8 = getelementptr inbounds half, half addrspace(1)* %0, i64 3 + store half %7, half addrspace(1)* %8, align 2 + ret void +} + +declare dso_local spir_func half @_Z30sub_group_clustered_reduce_addDhj(half, i32) local_unnamed_addr + +declare dso_local spir_func half @_Z30sub_group_clustered_reduce_mulDhj(half, i32) local_unnamed_addr + +declare dso_local spir_func half @_Z30sub_group_clustered_reduce_minDhj(half, i32) local_unnamed_addr + +declare dso_local spir_func half @_Z30sub_group_clustered_reduce_maxDhj(half, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#double]] %[[#ScopeSubgroup]] ClusteredReduce %[[#double_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#double]] %[[#ScopeSubgroup]] ClusteredReduce %[[#double_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#double]] %[[#ScopeSubgroup]] ClusteredReduce %[[#double_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMax %[[#double]] %[[#ScopeSubgroup]] ClusteredReduce %[[#double_0]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredArithmeticDouble(double addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func double @_Z30sub_group_clustered_reduce_adddj(double 0.000000e+00, i32 2) + store double %2, double addrspace(1)* %0, align 8 + %3 = tail call spir_func double @_Z30sub_group_clustered_reduce_muldj(double 0.000000e+00, i32 2) + %4 = getelementptr inbounds double, double addrspace(1)* %0, i64 1 + store double %3, double addrspace(1)* %4, align 8 + %5 = tail call spir_func double @_Z30sub_group_clustered_reduce_mindj(double 0.000000e+00, i32 2) + %6 = getelementptr inbounds double, double addrspace(1)* %0, i64 2 + store double %5, double addrspace(1)* %6, align 8 + %7 = tail call spir_func double @_Z30sub_group_clustered_reduce_maxdj(double 0.000000e+00, i32 2) + %8 = getelementptr inbounds double, double addrspace(1)* %0, i64 3 + store double %7, double addrspace(1)* %8, align 8 + ret void +} + +declare dso_local spir_func double @_Z30sub_group_clustered_reduce_adddj(double, i32) local_unnamed_addr + +declare dso_local spir_func double @_Z30sub_group_clustered_reduce_muldj(double, i32) local_unnamed_addr + +declare dso_local spir_func double @_Z30sub_group_clustered_reduce_mindj(double, i32) local_unnamed_addr + +declare dso_local spir_func double @_Z30sub_group_clustered_reduce_maxdj(double, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredBitwiseChar(i8 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func signext i8 @_Z30sub_group_clustered_reduce_andcj(i8 signext 0, i32 2) + store i8 %2, i8 addrspace(1)* %0, align 1 + %3 = tail call spir_func signext i8 @_Z29sub_group_clustered_reduce_orcj(i8 signext 0, i32 2) + %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1 + store i8 %3, i8 addrspace(1)* %4, align 1 + %5 = tail call spir_func signext i8 @_Z30sub_group_clustered_reduce_xorcj(i8 signext 0, i32 2) + %6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2 + store i8 %5, i8 addrspace(1)* %6, align 1 + ret void +} + +declare dso_local spir_func signext i8 @_Z30sub_group_clustered_reduce_andcj(i8 signext, i32) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z29sub_group_clustered_reduce_orcj(i8 signext, i32) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z30sub_group_clustered_reduce_xorcj(i8 signext, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredBitwiseUChar(i8 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func zeroext i8 @_Z30sub_group_clustered_reduce_andhj(i8 zeroext 0, i32 2) + store i8 %2, i8 addrspace(1)* %0, align 1 + %3 = tail call spir_func zeroext i8 @_Z29sub_group_clustered_reduce_orhj(i8 zeroext 0, i32 2) + %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1 + store i8 %3, i8 addrspace(1)* %4, align 1 + %5 = tail call spir_func zeroext i8 @_Z30sub_group_clustered_reduce_xorhj(i8 zeroext 0, i32 2) + %6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2 + store i8 %5, i8 addrspace(1)* %6, align 1 + ret void +} + +declare dso_local spir_func zeroext i8 @_Z30sub_group_clustered_reduce_andhj(i8 zeroext, i32) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z29sub_group_clustered_reduce_orhj(i8 zeroext, i32) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z30sub_group_clustered_reduce_xorhj(i8 zeroext, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredBitwiseShort(i16 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func signext i16 @_Z30sub_group_clustered_reduce_andsj(i16 signext 0, i32 2) + store i16 %2, i16 addrspace(1)* %0, align 2 + %3 = tail call spir_func signext i16 @_Z29sub_group_clustered_reduce_orsj(i16 signext 0, i32 2) + %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1 + store i16 %3, i16 addrspace(1)* %4, align 2 + %5 = tail call spir_func signext i16 @_Z30sub_group_clustered_reduce_xorsj(i16 signext 0, i32 2) + %6 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 2 + store i16 %5, i16 addrspace(1)* %6, align 2 + ret void +} + +declare dso_local spir_func signext i16 @_Z30sub_group_clustered_reduce_andsj(i16 signext, i32) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z29sub_group_clustered_reduce_orsj(i16 signext, i32) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z30sub_group_clustered_reduce_xorsj(i16 signext, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredBitwiseUShort(i16 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func zeroext i16 @_Z30sub_group_clustered_reduce_andtj(i16 zeroext 0, i32 2) + store i16 %2, i16 addrspace(1)* %0, align 2 + %3 = tail call spir_func zeroext i16 @_Z29sub_group_clustered_reduce_ortj(i16 zeroext 0, i32 2) + %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1 + store i16 %3, i16 addrspace(1)* %4, align 2 + %5 = tail call spir_func zeroext i16 @_Z30sub_group_clustered_reduce_xortj(i16 zeroext 0, i32 2) + %6 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 2 + store i16 %5, i16 addrspace(1)* %6, align 2 + ret void +} + +declare dso_local spir_func zeroext i16 @_Z30sub_group_clustered_reduce_andtj(i16 zeroext, i32) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z29sub_group_clustered_reduce_ortj(i16 zeroext, i32) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z30sub_group_clustered_reduce_xortj(i16 zeroext, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredBitwiseInt(i32 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_andij(i32 0, i32 2) + store i32 %2, i32 addrspace(1)* %0, align 4 + %3 = tail call spir_func i32 @_Z29sub_group_clustered_reduce_orij(i32 0, i32 2) + %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 + store i32 %3, i32 addrspace(1)* %4, align 4 + %5 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_xorij(i32 0, i32 2) + %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2 + store i32 %5, i32 addrspace(1)* %6, align 4 + ret void +} + +declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_andij(i32, i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z29sub_group_clustered_reduce_orij(i32, i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_xorij(i32, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredBitwiseUInt(i32 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_andjj(i32 0, i32 2) + store i32 %2, i32 addrspace(1)* %0, align 4 + %3 = tail call spir_func i32 @_Z29sub_group_clustered_reduce_orjj(i32 0, i32 2) + %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 + store i32 %3, i32 addrspace(1)* %4, align 4 + %5 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_xorjj(i32 0, i32 2) + %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2 + store i32 %5, i32 addrspace(1)* %6, align 4 + ret void +} + +declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_andjj(i32, i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z29sub_group_clustered_reduce_orjj(i32, i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_xorjj(i32, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredBitwiseLong(i64 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_andlj(i64 0, i32 2) + store i64 %2, i64 addrspace(1)* %0, align 8 + %3 = tail call spir_func i64 @_Z29sub_group_clustered_reduce_orlj(i64 0, i32 2) + %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1 + store i64 %3, i64 addrspace(1)* %4, align 8 + %5 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_xorlj(i64 0, i32 2) + %6 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 2 + store i64 %5, i64 addrspace(1)* %6, align 8 + ret void +} + +declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_andlj(i64, i32) local_unnamed_addr + +declare dso_local spir_func i64 @_Z29sub_group_clustered_reduce_orlj(i64, i32) local_unnamed_addr + +declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_xorlj(i64, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredBitwiseULong(i64 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_andmj(i64 0, i32 2) + store i64 %2, i64 addrspace(1)* %0, align 8 + %3 = tail call spir_func i64 @_Z29sub_group_clustered_reduce_ormj(i64 0, i32 2) + %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1 + store i64 %3, i64 addrspace(1)* %4, align 8 + %5 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_xormj(i64 0, i32 2) + %6 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 2 + store i64 %5, i64 addrspace(1)* %6, align 8 + ret void +} + +declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_andmj(i64, i32) local_unnamed_addr + +declare dso_local spir_func i64 @_Z29sub_group_clustered_reduce_ormj(i64, i32) local_unnamed_addr + +declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_xormj(i64, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalAnd %[[#bool]] %[[#ScopeSubgroup]] ClusteredReduce %[[#false]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalOr %[[#bool]] %[[#ScopeSubgroup]] ClusteredReduce %[[#false]] %[[#int_2]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalXor %[[#bool]] %[[#ScopeSubgroup]] ClusteredReduce %[[#false]] %[[#int_2]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testClusteredLogical(i32 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i32 @_Z38sub_group_clustered_reduce_logical_andij(i32 0, i32 2) + store i32 %2, i32 addrspace(1)* %0, align 4 + %3 = tail call spir_func i32 @_Z37sub_group_clustered_reduce_logical_orij(i32 0, i32 2) + %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 + store i32 %3, i32 addrspace(1)* %4, align 4 + %5 = tail call spir_func i32 @_Z38sub_group_clustered_reduce_logical_xorij(i32 0, i32 2) + %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2 + store i32 %5, i32 addrspace(1)* %6, align 4 + ret void +} + +declare dso_local spir_func i32 @_Z38sub_group_clustered_reduce_logical_andij(i32, i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z37sub_group_clustered_reduce_logical_orij(i32, i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z38sub_group_clustered_reduce_logical_xorij(i32, i32) local_unnamed_addr diff --git a/llvm/test/CodeGen/SPIRV/transcoding/sub_group_extended_types.ll b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_extended_types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_extended_types.ll @@ -0,0 +1,1031 @@ +;; #pragma OPENCL EXTENSION cl_khr_subgroup_extended_types : enable +;; #pragma OPENCL EXTENSION cl_khr_fp16 : enable +;; #pragma OPENCL EXTENSION cl_khr_fp64 : enable +;; +;; kernel void testBroadcastChar() +;; { +;; char16 v = 0; +;; v.s0 = sub_group_broadcast(v.s0, 0); +;; v.s01 = sub_group_broadcast(v.s01, 0); +;; v.s012 = sub_group_broadcast(v.s012, 0); +;; v.s0123 = sub_group_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_broadcast(v.s01234567, 0); +;; v = sub_group_broadcast(v, 0); +;; } +;; +;; kernel void testBroadcastUChar() +;; { +;; uchar16 v = 0; +;; v.s0 = sub_group_broadcast(v.s0, 0); +;; v.s01 = sub_group_broadcast(v.s01, 0); +;; v.s012 = sub_group_broadcast(v.s012, 0); +;; v.s0123 = sub_group_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_broadcast(v.s01234567, 0); +;; v = sub_group_broadcast(v, 0); +;; } +;; +;; kernel void testBroadcastShort() +;; { +;; short16 v = 0; +;; v.s0 = sub_group_broadcast(v.s0, 0); +;; v.s01 = sub_group_broadcast(v.s01, 0); +;; v.s012 = sub_group_broadcast(v.s012, 0); +;; v.s0123 = sub_group_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_broadcast(v.s01234567, 0); +;; v = sub_group_broadcast(v, 0); +;; } +;; +;; kernel void testBroadcastUShort() +;; { +;; ushort16 v = 0; +;; v.s0 = sub_group_broadcast(v.s0, 0); +;; v.s01 = sub_group_broadcast(v.s01, 0); +;; v.s012 = sub_group_broadcast(v.s012, 0); +;; v.s0123 = sub_group_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_broadcast(v.s01234567, 0); +;; v = sub_group_broadcast(v, 0); +;; } +;; +;; kernel void testBroadcastInt() +;; { +;; int16 v = 0; +;; v.s0 = sub_group_broadcast(v.s0, 0); +;; v.s01 = sub_group_broadcast(v.s01, 0); +;; v.s012 = sub_group_broadcast(v.s012, 0); +;; v.s0123 = sub_group_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_broadcast(v.s01234567, 0); +;; v = sub_group_broadcast(v, 0); +;; } +;; +;; kernel void testBroadcastUInt() +;; { +;; uint16 v = 0; +;; v.s0 = sub_group_broadcast(v.s0, 0); +;; v.s01 = sub_group_broadcast(v.s01, 0); +;; v.s012 = sub_group_broadcast(v.s012, 0); +;; v.s0123 = sub_group_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_broadcast(v.s01234567, 0); +;; v = sub_group_broadcast(v, 0); +;; } +;; +;; kernel void testBroadcastLong() +;; { +;; long16 v = 0; +;; v.s0 = sub_group_broadcast(v.s0, 0); +;; v.s01 = sub_group_broadcast(v.s01, 0); +;; v.s012 = sub_group_broadcast(v.s012, 0); +;; v.s0123 = sub_group_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_broadcast(v.s01234567, 0); +;; v = sub_group_broadcast(v, 0); +;; } +;; +;; kernel void testBroadcastULong() +;; { +;; ulong16 v = 0; +;; v.s0 = sub_group_broadcast(v.s0, 0); +;; v.s01 = sub_group_broadcast(v.s01, 0); +;; v.s012 = sub_group_broadcast(v.s012, 0); +;; v.s0123 = sub_group_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_broadcast(v.s01234567, 0); +;; v = sub_group_broadcast(v, 0); +;; } +;; +;; kernel void testBroadcastFloat() +;; { +;; float16 v = 0; +;; v.s0 = sub_group_broadcast(v.s0, 0); +;; v.s01 = sub_group_broadcast(v.s01, 0); +;; v.s012 = sub_group_broadcast(v.s012, 0); +;; v.s0123 = sub_group_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_broadcast(v.s01234567, 0); +;; v = sub_group_broadcast(v, 0); +;; } +;; +;; kernel void testBroadcastHalf() +;; { +;; half16 v = 0; +;; v.s0 = sub_group_broadcast(v.s0, 0); +;; v.s01 = sub_group_broadcast(v.s01, 0); +;; v.s012 = sub_group_broadcast(v.s012, 0); +;; v.s0123 = sub_group_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_broadcast(v.s01234567, 0); +;; v = sub_group_broadcast(v, 0); +;; } +;; +;; kernel void testBroadcastDouble() +;; { +;; double16 v = 0; +;; v.s0 = sub_group_broadcast(v.s0, 0); +;; v.s01 = sub_group_broadcast(v.s01, 0); +;; v.s012 = sub_group_broadcast(v.s012, 0); +;; v.s0123 = sub_group_broadcast(v.s0123, 0); +;; v.s01234567 = sub_group_broadcast(v.s01234567, 0); +;; v = sub_group_broadcast(v, 0); +;; } +;; +;; kernel void testReduceScanChar(global char* dst) +;; { +;; char v = 0; +;; dst[0] = sub_group_reduce_add(v); +;; dst[1] = sub_group_reduce_min(v); +;; dst[2] = sub_group_reduce_max(v); +;; dst[3] = sub_group_scan_inclusive_add(v); +;; dst[4] = sub_group_scan_inclusive_min(v); +;; dst[5] = sub_group_scan_inclusive_max(v); +;; dst[6] = sub_group_scan_exclusive_add(v); +;; dst[7] = sub_group_scan_exclusive_min(v); +;; dst[8] = sub_group_scan_exclusive_max(v); +;; } +;; +;; kernel void testReduceScanUChar(global uchar* dst) +;; { +;; uchar v = 0; +;; dst[0] = sub_group_reduce_add(v); +;; dst[1] = sub_group_reduce_min(v); +;; dst[2] = sub_group_reduce_max(v); +;; dst[3] = sub_group_scan_inclusive_add(v); +;; dst[4] = sub_group_scan_inclusive_min(v); +;; dst[5] = sub_group_scan_inclusive_max(v); +;; dst[6] = sub_group_scan_exclusive_add(v); +;; dst[7] = sub_group_scan_exclusive_min(v); +;; dst[8] = sub_group_scan_exclusive_max(v); +;; } +;; +;; kernel void testReduceScanShort(global short* dst) +;; { +;; short v = 0; +;; dst[0] = sub_group_reduce_add(v); +;; dst[1] = sub_group_reduce_min(v); +;; dst[2] = sub_group_reduce_max(v); +;; dst[3] = sub_group_scan_inclusive_add(v); +;; dst[4] = sub_group_scan_inclusive_min(v); +;; dst[5] = sub_group_scan_inclusive_max(v); +;; dst[6] = sub_group_scan_exclusive_add(v); +;; dst[7] = sub_group_scan_exclusive_min(v); +;; dst[8] = sub_group_scan_exclusive_max(v); +;; } +;; +;; kernel void testReduceScanUShort(global ushort* dst) +;; { +;; ushort v = 0; +;; dst[0] = sub_group_reduce_add(v); +;; dst[1] = sub_group_reduce_min(v); +;; dst[2] = sub_group_reduce_max(v); +;; dst[3] = sub_group_scan_inclusive_add(v); +;; dst[4] = sub_group_scan_inclusive_min(v); +;; dst[5] = sub_group_scan_inclusive_max(v); +;; dst[6] = sub_group_scan_exclusive_add(v); +;; dst[7] = sub_group_scan_exclusive_min(v); +;; dst[8] = sub_group_scan_exclusive_max(v); +;; } + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV-DAG: %[[#char:]] = OpTypeInt 8 0 +; CHECK-SPIRV-DAG: %[[#short:]] = OpTypeInt 16 0 +; CHECK-SPIRV-DAG: %[[#int:]] = OpTypeInt 32 0 +; CHECK-SPIRV-DAG: %[[#long:]] = OpTypeInt 64 0 +; CHECK-SPIRV-DAG: %[[#half:]] = OpTypeFloat 16 +; CHECK-SPIRV-DAG: %[[#float:]] = OpTypeFloat 32 +; CHECK-SPIRV-DAG: %[[#double:]] = OpTypeFloat 64 + +; CHECK-SPIRV-DAG: %[[#char2:]] = OpTypeVector %[[#char]] 2 +; CHECK-SPIRV-DAG: %[[#char3:]] = OpTypeVector %[[#char]] 3 +; CHECK-SPIRV-DAG: %[[#char4:]] = OpTypeVector %[[#char]] 4 +; CHECK-SPIRV-DAG: %[[#char8:]] = OpTypeVector %[[#char]] 8 +; CHECK-SPIRV-DAG: %[[#char16:]] = OpTypeVector %[[#char]] 16 + +; CHECK-SPIRV-DAG: %[[#short2:]] = OpTypeVector %[[#short]] 2 +; CHECK-SPIRV-DAG: %[[#short3:]] = OpTypeVector %[[#short]] 3 +; CHECK-SPIRV-DAG: %[[#short4:]] = OpTypeVector %[[#short]] 4 +; CHECK-SPIRV-DAG: %[[#short8:]] = OpTypeVector %[[#short]] 8 +; CHECK-SPIRV-DAG: %[[#short16:]] = OpTypeVector %[[#short]] 16 + +; CHECK-SPIRV-DAG: %[[#int2:]] = OpTypeVector %[[#int]] 2 +; CHECK-SPIRV-DAG: %[[#int3:]] = OpTypeVector %[[#int]] 3 +; CHECK-SPIRV-DAG: %[[#int4:]] = OpTypeVector %[[#int]] 4 +; CHECK-SPIRV-DAG: %[[#int8:]] = OpTypeVector %[[#int]] 8 +; CHECK-SPIRV-DAG: %[[#int16:]] = OpTypeVector %[[#int]] 16 + +; CHECK-SPIRV-DAG: %[[#long2:]] = OpTypeVector %[[#long]] 2 +; CHECK-SPIRV-DAG: %[[#long3:]] = OpTypeVector %[[#long]] 3 +; CHECK-SPIRV-DAG: %[[#long4:]] = OpTypeVector %[[#long]] 4 +; CHECK-SPIRV-DAG: %[[#long8:]] = OpTypeVector %[[#long]] 8 +; CHECK-SPIRV-DAG: %[[#long16:]] = OpTypeVector %[[#long]] 16 + +; CHECK-SPIRV-DAG: %[[#float2:]] = OpTypeVector %[[#float]] 2 +; CHECK-SPIRV-DAG: %[[#float3:]] = OpTypeVector %[[#float]] 3 +; CHECK-SPIRV-DAG: %[[#float4:]] = OpTypeVector %[[#float]] 4 +; CHECK-SPIRV-DAG: %[[#float8:]] = OpTypeVector %[[#float]] 8 +; CHECK-SPIRV-DAG: %[[#float16:]] = OpTypeVector %[[#float]] 16 + +; CHECK-SPIRV-DAG: %[[#half2:]] = OpTypeVector %[[#half]] 2 +; CHECK-SPIRV-DAG: %[[#half3:]] = OpTypeVector %[[#half]] 3 +; CHECK-SPIRV-DAG: %[[#half4:]] = OpTypeVector %[[#half]] 4 +; CHECK-SPIRV-DAG: %[[#half8:]] = OpTypeVector %[[#half]] 8 +; CHECK-SPIRV-DAG: %[[#half16:]] = OpTypeVector %[[#half]] 16 + +; CHECK-SPIRV-DAG: %[[#double2:]] = OpTypeVector %[[#double]] 2 +; CHECK-SPIRV-DAG: %[[#double3:]] = OpTypeVector %[[#double]] 3 +; CHECK-SPIRV-DAG: %[[#double4:]] = OpTypeVector %[[#double]] 4 +; CHECK-SPIRV-DAG: %[[#double8:]] = OpTypeVector %[[#double]] 8 +; CHECK-SPIRV-DAG: %[[#double16:]] = OpTypeVector %[[#double]] 16 + +; CHECK-SPIRV-DAG: %[[#ScopeSubgroup:]] = OpConstant %[[#int]] 3 +; CHECK-SPIRV-DAG: %[[#char_0:]] = OpConstant %[[#char]] 0 +; CHECK-SPIRV-DAG: %[[#short_0:]] = OpConstant %[[#short]] 0 +; CHECK-SPIRV-DAG: %[[#int_0:]] = OpConstant %[[#int]] 0 +; CHECK-SPIRV-DAG: %[[#long_0:]] = OpConstantNull %[[#long]] +; CHECK-SPIRV-DAG: %[[#half_0:]] = OpConstant %[[#half]] 0 +; CHECK-SPIRV-DAG: %[[#float_0:]] = OpConstant %[[#float]] 0 +; CHECK-SPIRV-DAG: %[[#double_0:]] = OpConstant %[[#double]] 0 + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#char2_0:]] = OpVectorShuffle %[[#char2]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#char2]] %[[#ScopeSubgroup]] %[[#char2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#char3_0:]] = OpVectorShuffle %[[#char3]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#char3]] %[[#ScopeSubgroup]] %[[#char3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#char4_0:]] = OpVectorShuffle %[[#char4]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#char4]] %[[#ScopeSubgroup]] %[[#char4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#char8_0:]] = OpVectorShuffle %[[#char8]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#char8]] %[[#ScopeSubgroup]] %[[#char8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#char16]] +; CHECK-SPIRV: %[[#char16_0:]] = OpVectorShuffle %[[#char16]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#char16]] %[[#ScopeSubgroup]] %[[#char16_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testBroadcastChar() local_unnamed_addr { + %1 = tail call spir_func signext i8 @_Z19sub_group_broadcastcj(i8 signext 0, i32 0) + %2 = insertelement <16 x i8> , i8 %1, i64 0 + %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <2 x i32> + %4 = tail call spir_func <2 x i8> @_Z19sub_group_broadcastDv2_cj(<2 x i8> %3, i32 0) + %5 = shufflevector <2 x i8> %4, <2 x i8> undef, <16 x i32> + %6 = shufflevector <16 x i8> %5, <16 x i8> %2, <16 x i32> + %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <3 x i32> + %8 = tail call spir_func <3 x i8> @_Z19sub_group_broadcastDv3_cj(<3 x i8> %7, i32 0) + %9 = shufflevector <3 x i8> %8, <3 x i8> undef, <16 x i32> + %10 = shufflevector <16 x i8> %9, <16 x i8> %6, <16 x i32> + %11 = shufflevector <16 x i8> %10, <16 x i8> undef, <4 x i32> + %12 = tail call spir_func <4 x i8> @_Z19sub_group_broadcastDv4_cj(<4 x i8> %11, i32 0) + %13 = shufflevector <4 x i8> %12, <4 x i8> undef, <16 x i32> + %14 = shufflevector <16 x i8> %13, <16 x i8> %10, <16 x i32> + %15 = shufflevector <16 x i8> %14, <16 x i8> undef, <8 x i32> + %16 = tail call spir_func <8 x i8> @_Z19sub_group_broadcastDv8_cj(<8 x i8> %15, i32 0) + %17 = shufflevector <8 x i8> %16, <8 x i8> undef, <16 x i32> + %18 = shufflevector <16 x i8> %17, <16 x i8> %14, <16 x i32> + %19 = tail call spir_func <16 x i8> @_Z19sub_group_broadcastDv16_cj(<16 x i8> %18, i32 0) + ret void +} + +declare dso_local spir_func signext i8 @_Z19sub_group_broadcastcj(i8 signext, i32) local_unnamed_addr + +declare dso_local spir_func <2 x i8> @_Z19sub_group_broadcastDv2_cj(<2 x i8>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x i8> @_Z19sub_group_broadcastDv3_cj(<3 x i8>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x i8> @_Z19sub_group_broadcastDv4_cj(<4 x i8>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x i8> @_Z19sub_group_broadcastDv8_cj(<8 x i8>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x i8> @_Z19sub_group_broadcastDv16_cj(<16 x i8>, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#char2_0:]] = OpVectorShuffle %[[#char2]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#char2]] %[[#ScopeSubgroup]] %[[#char2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#char3_0:]] = OpVectorShuffle %[[#char3]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#char3]] %[[#ScopeSubgroup]] %[[#char3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#char4_0:]] = OpVectorShuffle %[[#char4]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#char4]] %[[#ScopeSubgroup]] %[[#char4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#char8_0:]] = OpVectorShuffle %[[#char8]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#char8]] %[[#ScopeSubgroup]] %[[#char8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#char16]] +; CHECK-SPIRV: %[[#char16_0:]] = OpVectorShuffle %[[#char16]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#char16]] %[[#ScopeSubgroup]] %[[#char16_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testBroadcastUChar() local_unnamed_addr { + %1 = tail call spir_func zeroext i8 @_Z19sub_group_broadcasthj(i8 zeroext 0, i32 0) + %2 = insertelement <16 x i8> , i8 %1, i64 0 + %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <2 x i32> + %4 = tail call spir_func <2 x i8> @_Z19sub_group_broadcastDv2_hj(<2 x i8> %3, i32 0) + %5 = shufflevector <2 x i8> %4, <2 x i8> undef, <16 x i32> + %6 = shufflevector <16 x i8> %5, <16 x i8> %2, <16 x i32> + %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <3 x i32> + %8 = tail call spir_func <3 x i8> @_Z19sub_group_broadcastDv3_hj(<3 x i8> %7, i32 0) + %9 = shufflevector <3 x i8> %8, <3 x i8> undef, <16 x i32> + %10 = shufflevector <16 x i8> %9, <16 x i8> %6, <16 x i32> + %11 = shufflevector <16 x i8> %10, <16 x i8> undef, <4 x i32> + %12 = tail call spir_func <4 x i8> @_Z19sub_group_broadcastDv4_hj(<4 x i8> %11, i32 0) + %13 = shufflevector <4 x i8> %12, <4 x i8> undef, <16 x i32> + %14 = shufflevector <16 x i8> %13, <16 x i8> %10, <16 x i32> + %15 = shufflevector <16 x i8> %14, <16 x i8> undef, <8 x i32> + %16 = tail call spir_func <8 x i8> @_Z19sub_group_broadcastDv8_hj(<8 x i8> %15, i32 0) + %17 = shufflevector <8 x i8> %16, <8 x i8> undef, <16 x i32> + %18 = shufflevector <16 x i8> %17, <16 x i8> %14, <16 x i32> + %19 = tail call spir_func <16 x i8> @_Z19sub_group_broadcastDv16_hj(<16 x i8> %18, i32 0) + ret void +} + +declare dso_local spir_func zeroext i8 @_Z19sub_group_broadcasthj(i8 zeroext, i32) local_unnamed_addr + +declare dso_local spir_func <2 x i8> @_Z19sub_group_broadcastDv2_hj(<2 x i8>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x i8> @_Z19sub_group_broadcastDv3_hj(<3 x i8>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x i8> @_Z19sub_group_broadcastDv4_hj(<4 x i8>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x i8> @_Z19sub_group_broadcastDv8_hj(<8 x i8>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x i8> @_Z19sub_group_broadcastDv16_hj(<16 x i8>, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#short]] %[[#ScopeSubgroup]] %[[#short_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#short2_0:]] = OpVectorShuffle %[[#short2]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#short2]] %[[#ScopeSubgroup]] %[[#short2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#short3_0:]] = OpVectorShuffle %[[#short3]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#short3]] %[[#ScopeSubgroup]] %[[#short3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#short4_0:]] = OpVectorShuffle %[[#short4]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#short4]] %[[#ScopeSubgroup]] %[[#short4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#short8_0:]] = OpVectorShuffle %[[#short8]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#short8]] %[[#ScopeSubgroup]] %[[#short8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#short16]] +; CHECK-SPIRV: %[[#short16_0:]] = OpVectorShuffle %[[#short16]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#short16]] %[[#ScopeSubgroup]] %[[#short16_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testBroadcastShort() local_unnamed_addr { + %1 = tail call spir_func signext i16 @_Z19sub_group_broadcastsj(i16 signext 0, i32 0) + %2 = insertelement <16 x i16> , i16 %1, i64 0 + %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <2 x i32> + %4 = tail call spir_func <2 x i16> @_Z19sub_group_broadcastDv2_sj(<2 x i16> %3, i32 0) + %5 = shufflevector <2 x i16> %4, <2 x i16> undef, <16 x i32> + %6 = shufflevector <16 x i16> %5, <16 x i16> %2, <16 x i32> + %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <3 x i32> + %8 = tail call spir_func <3 x i16> @_Z19sub_group_broadcastDv3_sj(<3 x i16> %7, i32 0) + %9 = shufflevector <3 x i16> %8, <3 x i16> undef, <16 x i32> + %10 = shufflevector <16 x i16> %9, <16 x i16> %6, <16 x i32> + %11 = shufflevector <16 x i16> %10, <16 x i16> undef, <4 x i32> + %12 = tail call spir_func <4 x i16> @_Z19sub_group_broadcastDv4_sj(<4 x i16> %11, i32 0) + %13 = shufflevector <4 x i16> %12, <4 x i16> undef, <16 x i32> + %14 = shufflevector <16 x i16> %13, <16 x i16> %10, <16 x i32> + %15 = shufflevector <16 x i16> %14, <16 x i16> undef, <8 x i32> + %16 = tail call spir_func <8 x i16> @_Z19sub_group_broadcastDv8_sj(<8 x i16> %15, i32 0) + %17 = shufflevector <8 x i16> %16, <8 x i16> undef, <16 x i32> + %18 = shufflevector <16 x i16> %17, <16 x i16> %14, <16 x i32> + %19 = tail call spir_func <16 x i16> @_Z19sub_group_broadcastDv16_sj(<16 x i16> %18, i32 0) + ret void +} + +declare dso_local spir_func signext i16 @_Z19sub_group_broadcastsj(i16 signext, i32) local_unnamed_addr + +declare dso_local spir_func <2 x i16> @_Z19sub_group_broadcastDv2_sj(<2 x i16>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x i16> @_Z19sub_group_broadcastDv3_sj(<3 x i16>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x i16> @_Z19sub_group_broadcastDv4_sj(<4 x i16>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x i16> @_Z19sub_group_broadcastDv8_sj(<8 x i16>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x i16> @_Z19sub_group_broadcastDv16_sj(<16 x i16>, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#short]] %[[#ScopeSubgroup]] %[[#short_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#short2_0:]] = OpVectorShuffle %[[#short2]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#short2]] %[[#ScopeSubgroup]] %[[#short2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#short3_0:]] = OpVectorShuffle %[[#short3]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#short3]] %[[#ScopeSubgroup]] %[[#short3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#short4_0:]] = OpVectorShuffle %[[#short4]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#short4]] %[[#ScopeSubgroup]] %[[#short4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#short8_0:]] = OpVectorShuffle %[[#short8]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#short8]] %[[#ScopeSubgroup]] %[[#short8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#short16]] +; CHECK-SPIRV: %[[#short16_0:]] = OpVectorShuffle %[[#short16]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#short16]] %[[#ScopeSubgroup]] %[[#short16_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testBroadcastUShort() local_unnamed_addr { + %1 = tail call spir_func zeroext i16 @_Z19sub_group_broadcasttj(i16 zeroext 0, i32 0) + %2 = insertelement <16 x i16> , i16 %1, i64 0 + %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <2 x i32> + %4 = tail call spir_func <2 x i16> @_Z19sub_group_broadcastDv2_tj(<2 x i16> %3, i32 0) + %5 = shufflevector <2 x i16> %4, <2 x i16> undef, <16 x i32> + %6 = shufflevector <16 x i16> %5, <16 x i16> %2, <16 x i32> + %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <3 x i32> + %8 = tail call spir_func <3 x i16> @_Z19sub_group_broadcastDv3_tj(<3 x i16> %7, i32 0) + %9 = shufflevector <3 x i16> %8, <3 x i16> undef, <16 x i32> + %10 = shufflevector <16 x i16> %9, <16 x i16> %6, <16 x i32> + %11 = shufflevector <16 x i16> %10, <16 x i16> undef, <4 x i32> + %12 = tail call spir_func <4 x i16> @_Z19sub_group_broadcastDv4_tj(<4 x i16> %11, i32 0) + %13 = shufflevector <4 x i16> %12, <4 x i16> undef, <16 x i32> + %14 = shufflevector <16 x i16> %13, <16 x i16> %10, <16 x i32> + %15 = shufflevector <16 x i16> %14, <16 x i16> undef, <8 x i32> + %16 = tail call spir_func <8 x i16> @_Z19sub_group_broadcastDv8_tj(<8 x i16> %15, i32 0) + %17 = shufflevector <8 x i16> %16, <8 x i16> undef, <16 x i32> + %18 = shufflevector <16 x i16> %17, <16 x i16> %14, <16 x i32> + %19 = tail call spir_func <16 x i16> @_Z19sub_group_broadcastDv16_tj(<16 x i16> %18, i32 0) + ret void +} + +declare dso_local spir_func zeroext i16 @_Z19sub_group_broadcasttj(i16 zeroext, i32) local_unnamed_addr + +declare dso_local spir_func <2 x i16> @_Z19sub_group_broadcastDv2_tj(<2 x i16>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x i16> @_Z19sub_group_broadcastDv3_tj(<3 x i16>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x i16> @_Z19sub_group_broadcastDv4_tj(<4 x i16>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x i16> @_Z19sub_group_broadcastDv8_tj(<8 x i16>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x i16> @_Z19sub_group_broadcastDv16_tj(<16 x i16>, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#int]] %[[#ScopeSubgroup]] %[[#int_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#int2_0:]] = OpVectorShuffle %[[#int2]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#int2]] %[[#ScopeSubgroup]] %[[#int2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#int3_0:]] = OpVectorShuffle %[[#int3]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#int3]] %[[#ScopeSubgroup]] %[[#int3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#int4_0:]] = OpVectorShuffle %[[#int4]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#int4]] %[[#ScopeSubgroup]] %[[#int4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#int8_0:]] = OpVectorShuffle %[[#int8]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#int8]] %[[#ScopeSubgroup]] %[[#int8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#int16]] +; CHECK-SPIRV: %[[#int16_0:]] = OpVectorShuffle %[[#int16]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#int16]] %[[#ScopeSubgroup]] %[[#int16_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testBroadcastInt() local_unnamed_addr { + %1 = tail call spir_func i32 @_Z19sub_group_broadcastij(i32 0, i32 0) + %2 = insertelement <16 x i32> , i32 %1, i64 0 + %3 = shufflevector <16 x i32> %2, <16 x i32> undef, <2 x i32> + %4 = tail call spir_func <2 x i32> @_Z19sub_group_broadcastDv2_ij(<2 x i32> %3, i32 0) + %5 = shufflevector <2 x i32> %4, <2 x i32> undef, <16 x i32> + %6 = shufflevector <16 x i32> %5, <16 x i32> %2, <16 x i32> + %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <3 x i32> + %8 = tail call spir_func <3 x i32> @_Z19sub_group_broadcastDv3_ij(<3 x i32> %7, i32 0) + %9 = shufflevector <3 x i32> %8, <3 x i32> undef, <16 x i32> + %10 = shufflevector <16 x i32> %9, <16 x i32> %6, <16 x i32> + %11 = shufflevector <16 x i32> %10, <16 x i32> undef, <4 x i32> + %12 = tail call spir_func <4 x i32> @_Z19sub_group_broadcastDv4_ij(<4 x i32> %11, i32 0) + %13 = shufflevector <4 x i32> %12, <4 x i32> undef, <16 x i32> + %14 = shufflevector <16 x i32> %13, <16 x i32> %10, <16 x i32> + %15 = shufflevector <16 x i32> %14, <16 x i32> undef, <8 x i32> + %16 = tail call spir_func <8 x i32> @_Z19sub_group_broadcastDv8_ij(<8 x i32> %15, i32 0) + %17 = shufflevector <8 x i32> %16, <8 x i32> undef, <16 x i32> + %18 = shufflevector <16 x i32> %17, <16 x i32> %14, <16 x i32> + %19 = tail call spir_func <16 x i32> @_Z19sub_group_broadcastDv16_ij(<16 x i32> %18, i32 0) + ret void +} + +declare dso_local spir_func i32 @_Z19sub_group_broadcastij(i32, i32) local_unnamed_addr + +declare dso_local spir_func <2 x i32> @_Z19sub_group_broadcastDv2_ij(<2 x i32>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x i32> @_Z19sub_group_broadcastDv3_ij(<3 x i32>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x i32> @_Z19sub_group_broadcastDv4_ij(<4 x i32>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x i32> @_Z19sub_group_broadcastDv8_ij(<8 x i32>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x i32> @_Z19sub_group_broadcastDv16_ij(<16 x i32>, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#int]] %[[#ScopeSubgroup]] %[[#int_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#int2_0:]] = OpVectorShuffle %[[#int2]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#int2]] %[[#ScopeSubgroup]] %[[#int2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#int3_0:]] = OpVectorShuffle %[[#int3]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#int3]] %[[#ScopeSubgroup]] %[[#int3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#int4_0:]] = OpVectorShuffle %[[#int4]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#int4]] %[[#ScopeSubgroup]] %[[#int4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#int8_0:]] = OpVectorShuffle %[[#int8]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#int8]] %[[#ScopeSubgroup]] %[[#int8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#int16]] +; CHECK-SPIRV: %[[#int16_0:]] = OpVectorShuffle %[[#int16]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#int16]] %[[#ScopeSubgroup]] %[[#int16_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testBroadcastUInt() local_unnamed_addr { + %1 = tail call spir_func i32 @_Z19sub_group_broadcastjj(i32 0, i32 0) + %2 = insertelement <16 x i32> , i32 %1, i64 0 + %3 = shufflevector <16 x i32> %2, <16 x i32> undef, <2 x i32> + %4 = tail call spir_func <2 x i32> @_Z19sub_group_broadcastDv2_jj(<2 x i32> %3, i32 0) + %5 = shufflevector <2 x i32> %4, <2 x i32> undef, <16 x i32> + %6 = shufflevector <16 x i32> %5, <16 x i32> %2, <16 x i32> + %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <3 x i32> + %8 = tail call spir_func <3 x i32> @_Z19sub_group_broadcastDv3_jj(<3 x i32> %7, i32 0) + %9 = shufflevector <3 x i32> %8, <3 x i32> undef, <16 x i32> + %10 = shufflevector <16 x i32> %9, <16 x i32> %6, <16 x i32> + %11 = shufflevector <16 x i32> %10, <16 x i32> undef, <4 x i32> + %12 = tail call spir_func <4 x i32> @_Z19sub_group_broadcastDv4_jj(<4 x i32> %11, i32 0) + %13 = shufflevector <4 x i32> %12, <4 x i32> undef, <16 x i32> + %14 = shufflevector <16 x i32> %13, <16 x i32> %10, <16 x i32> + %15 = shufflevector <16 x i32> %14, <16 x i32> undef, <8 x i32> + %16 = tail call spir_func <8 x i32> @_Z19sub_group_broadcastDv8_jj(<8 x i32> %15, i32 0) + %17 = shufflevector <8 x i32> %16, <8 x i32> undef, <16 x i32> + %18 = shufflevector <16 x i32> %17, <16 x i32> %14, <16 x i32> + %19 = tail call spir_func <16 x i32> @_Z19sub_group_broadcastDv16_jj(<16 x i32> %18, i32 0) + ret void +} + +declare dso_local spir_func i32 @_Z19sub_group_broadcastjj(i32, i32) local_unnamed_addr + +declare dso_local spir_func <2 x i32> @_Z19sub_group_broadcastDv2_jj(<2 x i32>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x i32> @_Z19sub_group_broadcastDv3_jj(<3 x i32>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x i32> @_Z19sub_group_broadcastDv4_jj(<4 x i32>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x i32> @_Z19sub_group_broadcastDv8_jj(<8 x i32>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x i32> @_Z19sub_group_broadcastDv16_jj(<16 x i32>, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#long]] %[[#ScopeSubgroup]] %[[#long_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#long2_0:]] = OpVectorShuffle %[[#long2]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#long2]] %[[#ScopeSubgroup]] %[[#long2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#long3_0:]] = OpVectorShuffle %[[#long3]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#long3]] %[[#ScopeSubgroup]] %[[#long3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#long4_0:]] = OpVectorShuffle %[[#long4]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#long4]] %[[#ScopeSubgroup]] %[[#long4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#long8_0:]] = OpVectorShuffle %[[#long8]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#long8]] %[[#ScopeSubgroup]] %[[#long8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#long16]] +; CHECK-SPIRV: %[[#long16_0:]] = OpVectorShuffle %[[#long16]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#long16]] %[[#ScopeSubgroup]] %[[#long16_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testBroadcastLong() local_unnamed_addr { + %1 = tail call spir_func i64 @_Z19sub_group_broadcastlj(i64 0, i32 0) + %2 = insertelement <16 x i64> , i64 %1, i64 0 + %3 = shufflevector <16 x i64> %2, <16 x i64> undef, <2 x i32> + %4 = tail call spir_func <2 x i64> @_Z19sub_group_broadcastDv2_lj(<2 x i64> %3, i32 0) + %5 = shufflevector <2 x i64> %4, <2 x i64> undef, <16 x i32> + %6 = shufflevector <16 x i64> %5, <16 x i64> %2, <16 x i32> + %7 = shufflevector <16 x i64> %6, <16 x i64> undef, <3 x i32> + %8 = tail call spir_func <3 x i64> @_Z19sub_group_broadcastDv3_lj(<3 x i64> %7, i32 0) + %9 = shufflevector <3 x i64> %8, <3 x i64> undef, <16 x i32> + %10 = shufflevector <16 x i64> %9, <16 x i64> %6, <16 x i32> + %11 = shufflevector <16 x i64> %10, <16 x i64> undef, <4 x i32> + %12 = tail call spir_func <4 x i64> @_Z19sub_group_broadcastDv4_lj(<4 x i64> %11, i32 0) + %13 = shufflevector <4 x i64> %12, <4 x i64> undef, <16 x i32> + %14 = shufflevector <16 x i64> %13, <16 x i64> %10, <16 x i32> + %15 = shufflevector <16 x i64> %14, <16 x i64> undef, <8 x i32> + %16 = tail call spir_func <8 x i64> @_Z19sub_group_broadcastDv8_lj(<8 x i64> %15, i32 0) + %17 = shufflevector <8 x i64> %16, <8 x i64> undef, <16 x i32> + %18 = shufflevector <16 x i64> %17, <16 x i64> %14, <16 x i32> + %19 = tail call spir_func <16 x i64> @_Z19sub_group_broadcastDv16_lj(<16 x i64> %18, i32 0) + ret void +} + +declare dso_local spir_func i64 @_Z19sub_group_broadcastlj(i64, i32) local_unnamed_addr + +declare dso_local spir_func <2 x i64> @_Z19sub_group_broadcastDv2_lj(<2 x i64>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x i64> @_Z19sub_group_broadcastDv3_lj(<3 x i64>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x i64> @_Z19sub_group_broadcastDv4_lj(<4 x i64>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x i64> @_Z19sub_group_broadcastDv8_lj(<8 x i64>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x i64> @_Z19sub_group_broadcastDv16_lj(<16 x i64>, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#long]] %[[#ScopeSubgroup]] %[[#long_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#long2_0:]] = OpVectorShuffle %[[#long2]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#long2]] %[[#ScopeSubgroup]] %[[#long2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#long3_0:]] = OpVectorShuffle %[[#long3]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#long3]] %[[#ScopeSubgroup]] %[[#long3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#long4_0:]] = OpVectorShuffle %[[#long4]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#long4]] %[[#ScopeSubgroup]] %[[#long4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#long8_0:]] = OpVectorShuffle %[[#long8]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#long8]] %[[#ScopeSubgroup]] %[[#long8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#long16]] +; CHECK-SPIRV: %[[#long16_0:]] = OpVectorShuffle %[[#long16]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#long16]] %[[#ScopeSubgroup]] %[[#long16_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testBroadcastULong() local_unnamed_addr { + %1 = tail call spir_func i64 @_Z19sub_group_broadcastmj(i64 0, i32 0) + %2 = insertelement <16 x i64> , i64 %1, i64 0 + %3 = shufflevector <16 x i64> %2, <16 x i64> undef, <2 x i32> + %4 = tail call spir_func <2 x i64> @_Z19sub_group_broadcastDv2_mj(<2 x i64> %3, i32 0) + %5 = shufflevector <2 x i64> %4, <2 x i64> undef, <16 x i32> + %6 = shufflevector <16 x i64> %5, <16 x i64> %2, <16 x i32> + %7 = shufflevector <16 x i64> %6, <16 x i64> undef, <3 x i32> + %8 = tail call spir_func <3 x i64> @_Z19sub_group_broadcastDv3_mj(<3 x i64> %7, i32 0) + %9 = shufflevector <3 x i64> %8, <3 x i64> undef, <16 x i32> + %10 = shufflevector <16 x i64> %9, <16 x i64> %6, <16 x i32> + %11 = shufflevector <16 x i64> %10, <16 x i64> undef, <4 x i32> + %12 = tail call spir_func <4 x i64> @_Z19sub_group_broadcastDv4_mj(<4 x i64> %11, i32 0) + %13 = shufflevector <4 x i64> %12, <4 x i64> undef, <16 x i32> + %14 = shufflevector <16 x i64> %13, <16 x i64> %10, <16 x i32> + %15 = shufflevector <16 x i64> %14, <16 x i64> undef, <8 x i32> + %16 = tail call spir_func <8 x i64> @_Z19sub_group_broadcastDv8_mj(<8 x i64> %15, i32 0) + %17 = shufflevector <8 x i64> %16, <8 x i64> undef, <16 x i32> + %18 = shufflevector <16 x i64> %17, <16 x i64> %14, <16 x i32> + %19 = tail call spir_func <16 x i64> @_Z19sub_group_broadcastDv16_mj(<16 x i64> %18, i32 0) + ret void +} + +declare dso_local spir_func i64 @_Z19sub_group_broadcastmj(i64, i32) local_unnamed_addr + +declare dso_local spir_func <2 x i64> @_Z19sub_group_broadcastDv2_mj(<2 x i64>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x i64> @_Z19sub_group_broadcastDv3_mj(<3 x i64>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x i64> @_Z19sub_group_broadcastDv4_mj(<4 x i64>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x i64> @_Z19sub_group_broadcastDv8_mj(<8 x i64>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x i64> @_Z19sub_group_broadcastDv16_mj(<16 x i64>, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#float]] %[[#ScopeSubgroup]] %[[#float_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#float2_0:]] = OpVectorShuffle %[[#float2]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#float2]] %[[#ScopeSubgroup]] %[[#float2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#float3_0:]] = OpVectorShuffle %[[#float3]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#float3]] %[[#ScopeSubgroup]] %[[#float3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#float4_0:]] = OpVectorShuffle %[[#float4]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#float4]] %[[#ScopeSubgroup]] %[[#float4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#float8_0:]] = OpVectorShuffle %[[#float8]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#float8]] %[[#ScopeSubgroup]] %[[#float8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#float16]] +; CHECK-SPIRV: %[[#float16_0:]] = OpVectorShuffle %[[#float16]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#float16]] %[[#ScopeSubgroup]] %[[#float16_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testBroadcastFloat() local_unnamed_addr { + %1 = tail call spir_func float @_Z19sub_group_broadcastfj(float 0.000000e+00, i32 0) + %2 = insertelement <16 x float> , float %1, i64 0 + %3 = shufflevector <16 x float> %2, <16 x float> undef, <2 x i32> + %4 = tail call spir_func <2 x float> @_Z19sub_group_broadcastDv2_fj(<2 x float> %3, i32 0) + %5 = shufflevector <2 x float> %4, <2 x float> undef, <16 x i32> + %6 = shufflevector <16 x float> %5, <16 x float> %2, <16 x i32> + %7 = shufflevector <16 x float> %6, <16 x float> undef, <3 x i32> + %8 = tail call spir_func <3 x float> @_Z19sub_group_broadcastDv3_fj(<3 x float> %7, i32 0) + %9 = shufflevector <3 x float> %8, <3 x float> undef, <16 x i32> + %10 = shufflevector <16 x float> %9, <16 x float> %6, <16 x i32> + %11 = shufflevector <16 x float> %10, <16 x float> undef, <4 x i32> + %12 = tail call spir_func <4 x float> @_Z19sub_group_broadcastDv4_fj(<4 x float> %11, i32 0) + %13 = shufflevector <4 x float> %12, <4 x float> undef, <16 x i32> + %14 = shufflevector <16 x float> %13, <16 x float> %10, <16 x i32> + %15 = shufflevector <16 x float> %14, <16 x float> undef, <8 x i32> + %16 = tail call spir_func <8 x float> @_Z19sub_group_broadcastDv8_fj(<8 x float> %15, i32 0) + %17 = shufflevector <8 x float> %16, <8 x float> undef, <16 x i32> + %18 = shufflevector <16 x float> %17, <16 x float> %14, <16 x i32> + %19 = tail call spir_func <16 x float> @_Z19sub_group_broadcastDv16_fj(<16 x float> %18, i32 0) + ret void +} + +declare dso_local spir_func float @_Z19sub_group_broadcastfj(float, i32) local_unnamed_addr + +declare dso_local spir_func <2 x float> @_Z19sub_group_broadcastDv2_fj(<2 x float>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x float> @_Z19sub_group_broadcastDv3_fj(<3 x float>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x float> @_Z19sub_group_broadcastDv4_fj(<4 x float>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x float> @_Z19sub_group_broadcastDv8_fj(<8 x float>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x float> @_Z19sub_group_broadcastDv16_fj(<16 x float>, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#half]] %[[#ScopeSubgroup]] %[[#half_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#half2_0:]] = OpVectorShuffle %[[#half2]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#half2]] %[[#ScopeSubgroup]] %[[#half2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#half3_0:]] = OpVectorShuffle %[[#half3]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#half3]] %[[#ScopeSubgroup]] %[[#half3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#half4_0:]] = OpVectorShuffle %[[#half4]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#half4]] %[[#ScopeSubgroup]] %[[#half4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#half8_0:]] = OpVectorShuffle %[[#half8]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#half8]] %[[#ScopeSubgroup]] %[[#half8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#half16]] +; CHECK-SPIRV: %[[#half16_0:]] = OpVectorShuffle %[[#half16]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#half16]] %[[#ScopeSubgroup]] %[[#half16_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testBroadcastHalf() local_unnamed_addr { + %1 = tail call spir_func half @_Z19sub_group_broadcastDhj(half 0xH0000, i32 0) + %2 = insertelement <16 x half> , half %1, i64 0 + %3 = shufflevector <16 x half> %2, <16 x half> undef, <2 x i32> + %4 = tail call spir_func <2 x half> @_Z19sub_group_broadcastDv2_Dhj(<2 x half> %3, i32 0) + %5 = shufflevector <2 x half> %4, <2 x half> undef, <16 x i32> + %6 = shufflevector <16 x half> %5, <16 x half> %2, <16 x i32> + %7 = shufflevector <16 x half> %6, <16 x half> undef, <3 x i32> + %8 = tail call spir_func <3 x half> @_Z19sub_group_broadcastDv3_Dhj(<3 x half> %7, i32 0) + %9 = shufflevector <3 x half> %8, <3 x half> undef, <16 x i32> + %10 = shufflevector <16 x half> %9, <16 x half> %6, <16 x i32> + %11 = shufflevector <16 x half> %10, <16 x half> undef, <4 x i32> + %12 = tail call spir_func <4 x half> @_Z19sub_group_broadcastDv4_Dhj(<4 x half> %11, i32 0) + %13 = shufflevector <4 x half> %12, <4 x half> undef, <16 x i32> + %14 = shufflevector <16 x half> %13, <16 x half> %10, <16 x i32> + %15 = shufflevector <16 x half> %14, <16 x half> undef, <8 x i32> + %16 = tail call spir_func <8 x half> @_Z19sub_group_broadcastDv8_Dhj(<8 x half> %15, i32 0) + %17 = shufflevector <8 x half> %16, <8 x half> undef, <16 x i32> + %18 = shufflevector <16 x half> %17, <16 x half> %14, <16 x i32> + %19 = tail call spir_func <16 x half> @_Z19sub_group_broadcastDv16_Dhj(<16 x half> %18, i32 0) + ret void +} + +declare dso_local spir_func half @_Z19sub_group_broadcastDhj(half, i32) local_unnamed_addr + +declare dso_local spir_func <2 x half> @_Z19sub_group_broadcastDv2_Dhj(<2 x half>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x half> @_Z19sub_group_broadcastDv3_Dhj(<3 x half>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x half> @_Z19sub_group_broadcastDv4_Dhj(<4 x half>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x half> @_Z19sub_group_broadcastDv8_Dhj(<8 x half>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x half> @_Z19sub_group_broadcastDv16_Dhj(<16 x half>, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#double]] %[[#ScopeSubgroup]] %[[#double_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#double2_0:]] = OpVectorShuffle %[[#double2]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#double2]] %[[#ScopeSubgroup]] %[[#double2_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#double3_0:]] = OpVectorShuffle %[[#double3]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#double3]] %[[#ScopeSubgroup]] %[[#double3_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#double4_0:]] = OpVectorShuffle %[[#double4]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#double4]] %[[#ScopeSubgroup]] %[[#double4_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#double8_0:]] = OpVectorShuffle %[[#double8]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#double8]] %[[#ScopeSubgroup]] %[[#double8_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#double16]] +; CHECK-SPIRV: %[[#double16_0:]] = OpVectorShuffle %[[#double16]] +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#double16]] %[[#ScopeSubgroup]] %[[#double16_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testBroadcastDouble() local_unnamed_addr { + %1 = tail call spir_func double @_Z19sub_group_broadcastdj(double 0.000000e+00, i32 0) + %2 = insertelement <16 x double> , double %1, i64 0 + %3 = shufflevector <16 x double> %2, <16 x double> undef, <2 x i32> + %4 = tail call spir_func <2 x double> @_Z19sub_group_broadcastDv2_dj(<2 x double> %3, i32 0) + %5 = shufflevector <2 x double> %4, <2 x double> undef, <16 x i32> + %6 = shufflevector <16 x double> %5, <16 x double> %2, <16 x i32> + %7 = shufflevector <16 x double> %6, <16 x double> undef, <3 x i32> + %8 = tail call spir_func <3 x double> @_Z19sub_group_broadcastDv3_dj(<3 x double> %7, i32 0) + %9 = shufflevector <3 x double> %8, <3 x double> undef, <16 x i32> + %10 = shufflevector <16 x double> %9, <16 x double> %6, <16 x i32> + %11 = shufflevector <16 x double> %10, <16 x double> undef, <4 x i32> + %12 = tail call spir_func <4 x double> @_Z19sub_group_broadcastDv4_dj(<4 x double> %11, i32 0) + %13 = shufflevector <4 x double> %12, <4 x double> undef, <16 x i32> + %14 = shufflevector <16 x double> %13, <16 x double> %10, <16 x i32> + %15 = shufflevector <16 x double> %14, <16 x double> undef, <8 x i32> + %16 = tail call spir_func <8 x double> @_Z19sub_group_broadcastDv8_dj(<8 x double> %15, i32 0) + %17 = shufflevector <8 x double> %16, <8 x double> undef, <16 x i32> + %18 = shufflevector <16 x double> %17, <16 x double> %14, <16 x i32> + %19 = tail call spir_func <16 x double> @_Z19sub_group_broadcastDv16_dj(<16 x double> %18, i32 0) + ret void +} + +declare dso_local spir_func double @_Z19sub_group_broadcastdj(double, i32) local_unnamed_addr + +declare dso_local spir_func <2 x double> @_Z19sub_group_broadcastDv2_dj(<2 x double>, i32) local_unnamed_addr + +declare dso_local spir_func <3 x double> @_Z19sub_group_broadcastDv3_dj(<3 x double>, i32) local_unnamed_addr + +declare dso_local spir_func <4 x double> @_Z19sub_group_broadcastDv4_dj(<4 x double>, i32) local_unnamed_addr + +declare dso_local spir_func <8 x double> @_Z19sub_group_broadcastDv8_dj(<8 x double>, i32) local_unnamed_addr + +declare dso_local spir_func <16 x double> @_Z19sub_group_broadcastDv16_dj(<16 x double>, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupIAdd %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupSMin %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupSMax %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupIAdd %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupSMin %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupSMax %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupIAdd %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupSMin %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupSMax %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testReduceScanChar(i8 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func signext i8 @_Z20sub_group_reduce_addc(i8 signext 0) + store i8 %2, i8 addrspace(1)* %0, align 1 + %3 = tail call spir_func signext i8 @_Z20sub_group_reduce_minc(i8 signext 0) + %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1 + store i8 %3, i8 addrspace(1)* %4, align 1 + %5 = tail call spir_func signext i8 @_Z20sub_group_reduce_maxc(i8 signext 0) + %6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2 + store i8 %5, i8 addrspace(1)* %6, align 1 + %7 = tail call spir_func signext i8 @_Z28sub_group_scan_inclusive_addc(i8 signext 0) + %8 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 3 + store i8 %7, i8 addrspace(1)* %8, align 1 + %9 = tail call spir_func signext i8 @_Z28sub_group_scan_inclusive_minc(i8 signext 0) + %10 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 4 + store i8 %9, i8 addrspace(1)* %10, align 1 + %11 = tail call spir_func signext i8 @_Z28sub_group_scan_inclusive_maxc(i8 signext 0) + %12 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 5 + store i8 %11, i8 addrspace(1)* %12, align 1 + %13 = tail call spir_func signext i8 @_Z28sub_group_scan_exclusive_addc(i8 signext 0) + %14 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 6 + store i8 %13, i8 addrspace(1)* %14, align 1 + %15 = tail call spir_func signext i8 @_Z28sub_group_scan_exclusive_minc(i8 signext 0) + %16 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 7 + store i8 %15, i8 addrspace(1)* %16, align 1 + %17 = tail call spir_func signext i8 @_Z28sub_group_scan_exclusive_maxc(i8 signext 0) + %18 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 8 + store i8 %17, i8 addrspace(1)* %18, align 1 + ret void +} + +declare dso_local spir_func signext i8 @_Z20sub_group_reduce_addc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z20sub_group_reduce_minc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z20sub_group_reduce_maxc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z28sub_group_scan_inclusive_addc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z28sub_group_scan_inclusive_minc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z28sub_group_scan_inclusive_maxc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z28sub_group_scan_exclusive_addc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z28sub_group_scan_exclusive_minc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z28sub_group_scan_exclusive_maxc(i8 signext) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupIAdd %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupUMin %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupUMax %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupIAdd %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupUMin %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupUMax %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupIAdd %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupUMin %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupUMax %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testReduceScanUChar(i8 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func zeroext i8 @_Z20sub_group_reduce_addh(i8 zeroext 0) + store i8 %2, i8 addrspace(1)* %0, align 1 + %3 = tail call spir_func zeroext i8 @_Z20sub_group_reduce_minh(i8 zeroext 0) + %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1 + store i8 %3, i8 addrspace(1)* %4, align 1 + %5 = tail call spir_func zeroext i8 @_Z20sub_group_reduce_maxh(i8 zeroext 0) + %6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2 + store i8 %5, i8 addrspace(1)* %6, align 1 + %7 = tail call spir_func zeroext i8 @_Z28sub_group_scan_inclusive_addh(i8 zeroext 0) + %8 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 3 + store i8 %7, i8 addrspace(1)* %8, align 1 + %9 = tail call spir_func zeroext i8 @_Z28sub_group_scan_inclusive_minh(i8 zeroext 0) + %10 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 4 + store i8 %9, i8 addrspace(1)* %10, align 1 + %11 = tail call spir_func zeroext i8 @_Z28sub_group_scan_inclusive_maxh(i8 zeroext 0) + %12 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 5 + store i8 %11, i8 addrspace(1)* %12, align 1 + %13 = tail call spir_func zeroext i8 @_Z28sub_group_scan_exclusive_addh(i8 zeroext 0) + %14 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 6 + store i8 %13, i8 addrspace(1)* %14, align 1 + %15 = tail call spir_func zeroext i8 @_Z28sub_group_scan_exclusive_minh(i8 zeroext 0) + %16 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 7 + store i8 %15, i8 addrspace(1)* %16, align 1 + %17 = tail call spir_func zeroext i8 @_Z28sub_group_scan_exclusive_maxh(i8 zeroext 0) + %18 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 8 + store i8 %17, i8 addrspace(1)* %18, align 1 + ret void +} + +declare dso_local spir_func zeroext i8 @_Z20sub_group_reduce_addh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z20sub_group_reduce_minh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z20sub_group_reduce_maxh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z28sub_group_scan_inclusive_addh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z28sub_group_scan_inclusive_minh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z28sub_group_scan_inclusive_maxh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z28sub_group_scan_exclusive_addh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z28sub_group_scan_exclusive_minh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z28sub_group_scan_exclusive_maxh(i8 zeroext) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupIAdd %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupSMin %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupSMax %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupIAdd %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupSMin %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupSMax %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupIAdd %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupSMin %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupSMax %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testReduceScanShort(i16 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func signext i16 @_Z20sub_group_reduce_adds(i16 signext 0) + store i16 %2, i16 addrspace(1)* %0, align 2 + %3 = tail call spir_func signext i16 @_Z20sub_group_reduce_mins(i16 signext 0) + %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1 + store i16 %3, i16 addrspace(1)* %4, align 2 + %5 = tail call spir_func signext i16 @_Z20sub_group_reduce_maxs(i16 signext 0) + %6 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 2 + store i16 %5, i16 addrspace(1)* %6, align 2 + %7 = tail call spir_func signext i16 @_Z28sub_group_scan_inclusive_adds(i16 signext 0) + %8 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 3 + store i16 %7, i16 addrspace(1)* %8, align 2 + %9 = tail call spir_func signext i16 @_Z28sub_group_scan_inclusive_mins(i16 signext 0) + %10 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 4 + store i16 %9, i16 addrspace(1)* %10, align 2 + %11 = tail call spir_func signext i16 @_Z28sub_group_scan_inclusive_maxs(i16 signext 0) + %12 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 5 + store i16 %11, i16 addrspace(1)* %12, align 2 + %13 = tail call spir_func signext i16 @_Z28sub_group_scan_exclusive_adds(i16 signext 0) + %14 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 6 + store i16 %13, i16 addrspace(1)* %14, align 2 + %15 = tail call spir_func signext i16 @_Z28sub_group_scan_exclusive_mins(i16 signext 0) + %16 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 7 + store i16 %15, i16 addrspace(1)* %16, align 2 + %17 = tail call spir_func signext i16 @_Z28sub_group_scan_exclusive_maxs(i16 signext 0) + %18 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 8 + store i16 %17, i16 addrspace(1)* %18, align 2 + ret void +} + +declare dso_local spir_func signext i16 @_Z20sub_group_reduce_adds(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z20sub_group_reduce_mins(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z20sub_group_reduce_maxs(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z28sub_group_scan_inclusive_adds(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z28sub_group_scan_inclusive_mins(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z28sub_group_scan_inclusive_maxs(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z28sub_group_scan_exclusive_adds(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z28sub_group_scan_exclusive_mins(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z28sub_group_scan_exclusive_maxs(i16 signext) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupIAdd %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupUMin %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupUMax %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupIAdd %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupUMin %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupUMax %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupIAdd %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupUMin %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupUMax %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testReduceScanUShort(i16 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func zeroext i16 @_Z20sub_group_reduce_addt(i16 zeroext 0) + store i16 %2, i16 addrspace(1)* %0, align 2 + %3 = tail call spir_func zeroext i16 @_Z20sub_group_reduce_mint(i16 zeroext 0) + %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1 + store i16 %3, i16 addrspace(1)* %4, align 2 + %5 = tail call spir_func zeroext i16 @_Z20sub_group_reduce_maxt(i16 zeroext 0) + %6 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 2 + store i16 %5, i16 addrspace(1)* %6, align 2 + %7 = tail call spir_func zeroext i16 @_Z28sub_group_scan_inclusive_addt(i16 zeroext 0) + %8 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 3 + store i16 %7, i16 addrspace(1)* %8, align 2 + %9 = tail call spir_func zeroext i16 @_Z28sub_group_scan_inclusive_mint(i16 zeroext 0) + %10 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 4 + store i16 %9, i16 addrspace(1)* %10, align 2 + %11 = tail call spir_func zeroext i16 @_Z28sub_group_scan_inclusive_maxt(i16 zeroext 0) + %12 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 5 + store i16 %11, i16 addrspace(1)* %12, align 2 + %13 = tail call spir_func zeroext i16 @_Z28sub_group_scan_exclusive_addt(i16 zeroext 0) + %14 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 6 + store i16 %13, i16 addrspace(1)* %14, align 2 + %15 = tail call spir_func zeroext i16 @_Z28sub_group_scan_exclusive_mint(i16 zeroext 0) + %16 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 7 + store i16 %15, i16 addrspace(1)* %16, align 2 + %17 = tail call spir_func zeroext i16 @_Z28sub_group_scan_exclusive_maxt(i16 zeroext 0) + %18 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 8 + store i16 %17, i16 addrspace(1)* %18, align 2 + ret void +} + +declare dso_local spir_func zeroext i16 @_Z20sub_group_reduce_addt(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z20sub_group_reduce_mint(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z20sub_group_reduce_maxt(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z28sub_group_scan_inclusive_addt(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z28sub_group_scan_inclusive_mint(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z28sub_group_scan_inclusive_maxt(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z28sub_group_scan_exclusive_addt(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z28sub_group_scan_exclusive_mint(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z28sub_group_scan_exclusive_maxt(i16 zeroext) local_unnamed_addr diff --git a/llvm/test/CodeGen/SPIRV/transcoding/sub_group_non_uniform_arithmetic.ll b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_non_uniform_arithmetic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_non_uniform_arithmetic.ll @@ -0,0 +1,1737 @@ +;; #pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_arithmetic : enable +;; #pragma OPENCL EXTENSION cl_khr_fp16 : enable +;; #pragma OPENCL EXTENSION cl_khr_fp64 : enable +;; +;; kernel void testNonUniformArithmeticChar(global char* dst) +;; { +;; char v = 0; +;; dst[0] = sub_group_non_uniform_reduce_add(v); +;; dst[1] = sub_group_non_uniform_reduce_mul(v); +;; dst[2] = sub_group_non_uniform_reduce_min(v); +;; dst[3] = sub_group_non_uniform_reduce_max(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_add(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_mul(v); +;; dst[6] = sub_group_non_uniform_scan_inclusive_min(v); +;; dst[7] = sub_group_non_uniform_scan_inclusive_max(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_add(v); +;; dst[9] = sub_group_non_uniform_scan_exclusive_mul(v); +;; dst[10] = sub_group_non_uniform_scan_exclusive_min(v); +;; dst[11] = sub_group_non_uniform_scan_exclusive_max(v); +;; } +;; +;; kernel void testNonUniformArithmeticUChar(global uchar* dst) +;; { +;; uchar v = 0; +;; dst[0] = sub_group_non_uniform_reduce_add(v); +;; dst[1] = sub_group_non_uniform_reduce_mul(v); +;; dst[2] = sub_group_non_uniform_reduce_min(v); +;; dst[3] = sub_group_non_uniform_reduce_max(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_add(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_mul(v); +;; dst[6] = sub_group_non_uniform_scan_inclusive_min(v); +;; dst[7] = sub_group_non_uniform_scan_inclusive_max(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_add(v); +;; dst[9] = sub_group_non_uniform_scan_exclusive_mul(v); +;; dst[10] = sub_group_non_uniform_scan_exclusive_min(v); +;; dst[11] = sub_group_non_uniform_scan_exclusive_max(v); +;; } +;; +;; kernel void testNonUniformArithmeticShort(global short* dst) +;; { +;; short v = 0; +;; dst[0] = sub_group_non_uniform_reduce_add(v); +;; dst[1] = sub_group_non_uniform_reduce_mul(v); +;; dst[2] = sub_group_non_uniform_reduce_min(v); +;; dst[3] = sub_group_non_uniform_reduce_max(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_add(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_mul(v); +;; dst[6] = sub_group_non_uniform_scan_inclusive_min(v); +;; dst[7] = sub_group_non_uniform_scan_inclusive_max(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_add(v); +;; dst[9] = sub_group_non_uniform_scan_exclusive_mul(v); +;; dst[10] = sub_group_non_uniform_scan_exclusive_min(v); +;; dst[11] = sub_group_non_uniform_scan_exclusive_max(v); +;; } +;; +;; kernel void testNonUniformArithmeticUShort(global ushort* dst) +;; { +;; ushort v = 0; +;; dst[0] = sub_group_non_uniform_reduce_add(v); +;; dst[1] = sub_group_non_uniform_reduce_mul(v); +;; dst[2] = sub_group_non_uniform_reduce_min(v); +;; dst[3] = sub_group_non_uniform_reduce_max(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_add(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_mul(v); +;; dst[6] = sub_group_non_uniform_scan_inclusive_min(v); +;; dst[7] = sub_group_non_uniform_scan_inclusive_max(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_add(v); +;; dst[9] = sub_group_non_uniform_scan_exclusive_mul(v); +;; dst[10] = sub_group_non_uniform_scan_exclusive_min(v); +;; dst[11] = sub_group_non_uniform_scan_exclusive_max(v); +;; } +;; +;; kernel void testNonUniformArithmeticInt(global int* dst) +;; { +;; int v = 0; +;; dst[0] = sub_group_non_uniform_reduce_add(v); +;; dst[1] = sub_group_non_uniform_reduce_mul(v); +;; dst[2] = sub_group_non_uniform_reduce_min(v); +;; dst[3] = sub_group_non_uniform_reduce_max(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_add(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_mul(v); +;; dst[6] = sub_group_non_uniform_scan_inclusive_min(v); +;; dst[7] = sub_group_non_uniform_scan_inclusive_max(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_add(v); +;; dst[9] = sub_group_non_uniform_scan_exclusive_mul(v); +;; dst[10] = sub_group_non_uniform_scan_exclusive_min(v); +;; dst[11] = sub_group_non_uniform_scan_exclusive_max(v); +;; } +;; +;; kernel void testNonUniformArithmeticUInt(global uint* dst) +;; { +;; uint v = 0; +;; dst[0] = sub_group_non_uniform_reduce_add(v); +;; dst[1] = sub_group_non_uniform_reduce_mul(v); +;; dst[2] = sub_group_non_uniform_reduce_min(v); +;; dst[3] = sub_group_non_uniform_reduce_max(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_add(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_mul(v); +;; dst[6] = sub_group_non_uniform_scan_inclusive_min(v); +;; dst[7] = sub_group_non_uniform_scan_inclusive_max(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_add(v); +;; dst[9] = sub_group_non_uniform_scan_exclusive_mul(v); +;; dst[10] = sub_group_non_uniform_scan_exclusive_min(v); +;; dst[11] = sub_group_non_uniform_scan_exclusive_max(v); +;; } +;; +;; kernel void testNonUniformArithmeticLong(global long* dst) +;; { +;; long v = 0; +;; dst[0] = sub_group_non_uniform_reduce_add(v); +;; dst[1] = sub_group_non_uniform_reduce_mul(v); +;; dst[2] = sub_group_non_uniform_reduce_min(v); +;; dst[3] = sub_group_non_uniform_reduce_max(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_add(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_mul(v); +;; dst[6] = sub_group_non_uniform_scan_inclusive_min(v); +;; dst[7] = sub_group_non_uniform_scan_inclusive_max(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_add(v); +;; dst[9] = sub_group_non_uniform_scan_exclusive_mul(v); +;; dst[10] = sub_group_non_uniform_scan_exclusive_min(v); +;; dst[11] = sub_group_non_uniform_scan_exclusive_max(v); +;; } +;; +;; kernel void testNonUniformArithmeticULong(global ulong* dst) +;; { +;; ulong v = 0; +;; dst[0] = sub_group_non_uniform_reduce_add(v); +;; dst[1] = sub_group_non_uniform_reduce_mul(v); +;; dst[2] = sub_group_non_uniform_reduce_min(v); +;; dst[3] = sub_group_non_uniform_reduce_max(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_add(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_mul(v); +;; dst[6] = sub_group_non_uniform_scan_inclusive_min(v); +;; dst[7] = sub_group_non_uniform_scan_inclusive_max(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_add(v); +;; dst[9] = sub_group_non_uniform_scan_exclusive_mul(v); +;; dst[10] = sub_group_non_uniform_scan_exclusive_min(v); +;; dst[11] = sub_group_non_uniform_scan_exclusive_max(v); +;; } +;; +;; kernel void testNonUniformArithmeticFloat(global float* dst) +;; { +;; float v = 0; +;; dst[0] = sub_group_non_uniform_reduce_add(v); +;; dst[1] = sub_group_non_uniform_reduce_mul(v); +;; dst[2] = sub_group_non_uniform_reduce_min(v); +;; dst[3] = sub_group_non_uniform_reduce_max(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_add(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_mul(v); +;; dst[6] = sub_group_non_uniform_scan_inclusive_min(v); +;; dst[7] = sub_group_non_uniform_scan_inclusive_max(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_add(v); +;; dst[9] = sub_group_non_uniform_scan_exclusive_mul(v); +;; dst[10] = sub_group_non_uniform_scan_exclusive_min(v); +;; dst[11] = sub_group_non_uniform_scan_exclusive_max(v); +;; } +;; +;; kernel void testNonUniformArithmeticHalf(global half* dst) +;; { +;; half v = 0; +;; dst[0] = sub_group_non_uniform_reduce_add(v); +;; dst[1] = sub_group_non_uniform_reduce_mul(v); +;; dst[2] = sub_group_non_uniform_reduce_min(v); +;; dst[3] = sub_group_non_uniform_reduce_max(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_add(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_mul(v); +;; dst[6] = sub_group_non_uniform_scan_inclusive_min(v); +;; dst[7] = sub_group_non_uniform_scan_inclusive_max(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_add(v); +;; dst[9] = sub_group_non_uniform_scan_exclusive_mul(v); +;; dst[10] = sub_group_non_uniform_scan_exclusive_min(v); +;; dst[11] = sub_group_non_uniform_scan_exclusive_max(v); +;; } +;; +;; kernel void testNonUniformArithmeticDouble(global double* dst) +;; { +;; double v = 0; +;; dst[0] = sub_group_non_uniform_reduce_add(v); +;; dst[1] = sub_group_non_uniform_reduce_mul(v); +;; dst[2] = sub_group_non_uniform_reduce_min(v); +;; dst[3] = sub_group_non_uniform_reduce_max(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_add(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_mul(v); +;; dst[6] = sub_group_non_uniform_scan_inclusive_min(v); +;; dst[7] = sub_group_non_uniform_scan_inclusive_max(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_add(v); +;; dst[9] = sub_group_non_uniform_scan_exclusive_mul(v); +;; dst[10] = sub_group_non_uniform_scan_exclusive_min(v); +;; dst[11] = sub_group_non_uniform_scan_exclusive_max(v); +;; } +;; +;; kernel void testNonUniformBitwiseChar(global char* dst) +;; { +;; char v = 0; +;; dst[0] = sub_group_non_uniform_reduce_and(v); +;; dst[1] = sub_group_non_uniform_reduce_or(v); +;; dst[2] = sub_group_non_uniform_reduce_xor(v); +;; dst[3] = sub_group_non_uniform_scan_inclusive_and(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_or(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_xor(v); +;; dst[6] = sub_group_non_uniform_scan_exclusive_and(v); +;; dst[7] = sub_group_non_uniform_scan_exclusive_or(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_xor(v); +;; } +;; +;; kernel void testNonUniformBitwiseUChar(global uchar* dst) +;; { +;; uchar v = 0; +;; dst[0] = sub_group_non_uniform_reduce_and(v); +;; dst[1] = sub_group_non_uniform_reduce_or(v); +;; dst[2] = sub_group_non_uniform_reduce_xor(v); +;; dst[3] = sub_group_non_uniform_scan_inclusive_and(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_or(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_xor(v); +;; dst[6] = sub_group_non_uniform_scan_exclusive_and(v); +;; dst[7] = sub_group_non_uniform_scan_exclusive_or(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_xor(v); +;; } +;; +;; kernel void testNonUniformBitwiseShort(global short* dst) +;; { +;; short v = 0; +;; dst[0] = sub_group_non_uniform_reduce_and(v); +;; dst[1] = sub_group_non_uniform_reduce_or(v); +;; dst[2] = sub_group_non_uniform_reduce_xor(v); +;; dst[3] = sub_group_non_uniform_scan_inclusive_and(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_or(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_xor(v); +;; dst[6] = sub_group_non_uniform_scan_exclusive_and(v); +;; dst[7] = sub_group_non_uniform_scan_exclusive_or(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_xor(v); +;; } +;; +;; kernel void testNonUniformBitwiseUShort(global ushort* dst) +;; { +;; ushort v = 0; +;; dst[0] = sub_group_non_uniform_reduce_and(v); +;; dst[1] = sub_group_non_uniform_reduce_or(v); +;; dst[2] = sub_group_non_uniform_reduce_xor(v); +;; dst[3] = sub_group_non_uniform_scan_inclusive_and(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_or(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_xor(v); +;; dst[6] = sub_group_non_uniform_scan_exclusive_and(v); +;; dst[7] = sub_group_non_uniform_scan_exclusive_or(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_xor(v); +;; } +;; +;; kernel void testNonUniformBitwiseInt(global int* dst) +;; { +;; int v = 0; +;; dst[0] = sub_group_non_uniform_reduce_and(v); +;; dst[1] = sub_group_non_uniform_reduce_or(v); +;; dst[2] = sub_group_non_uniform_reduce_xor(v); +;; dst[3] = sub_group_non_uniform_scan_inclusive_and(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_or(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_xor(v); +;; dst[6] = sub_group_non_uniform_scan_exclusive_and(v); +;; dst[7] = sub_group_non_uniform_scan_exclusive_or(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_xor(v); +;; } +;; +;; kernel void testNonUniformBitwiseUInt(global uint* dst) +;; { +;; uint v = 0; +;; dst[0] = sub_group_non_uniform_reduce_and(v); +;; dst[1] = sub_group_non_uniform_reduce_or(v); +;; dst[2] = sub_group_non_uniform_reduce_xor(v); +;; dst[3] = sub_group_non_uniform_scan_inclusive_and(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_or(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_xor(v); +;; dst[6] = sub_group_non_uniform_scan_exclusive_and(v); +;; dst[7] = sub_group_non_uniform_scan_exclusive_or(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_xor(v); +;; } +;; +;; kernel void testNonUniformBitwiseLong(global long* dst) +;; { +;; long v = 0; +;; dst[0] = sub_group_non_uniform_reduce_and(v); +;; dst[1] = sub_group_non_uniform_reduce_or(v); +;; dst[2] = sub_group_non_uniform_reduce_xor(v); +;; dst[3] = sub_group_non_uniform_scan_inclusive_and(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_or(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_xor(v); +;; dst[6] = sub_group_non_uniform_scan_exclusive_and(v); +;; dst[7] = sub_group_non_uniform_scan_exclusive_or(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_xor(v); +;; } +;; +;; kernel void testNonUniformBitwiseULong(global ulong* dst) +;; { +;; ulong v = 0; +;; dst[0] = sub_group_non_uniform_reduce_and(v); +;; dst[1] = sub_group_non_uniform_reduce_or(v); +;; dst[2] = sub_group_non_uniform_reduce_xor(v); +;; dst[3] = sub_group_non_uniform_scan_inclusive_and(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_or(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_xor(v); +;; dst[6] = sub_group_non_uniform_scan_exclusive_and(v); +;; dst[7] = sub_group_non_uniform_scan_exclusive_or(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_xor(v); +;; } +;; +;; kernel void testNonUniformLogical(global int* dst) +;; { +;; int v = 0; +;; dst[0] = sub_group_non_uniform_reduce_logical_and(v); +;; dst[1] = sub_group_non_uniform_reduce_logical_or(v); +;; dst[2] = sub_group_non_uniform_reduce_logical_xor(v); +;; dst[3] = sub_group_non_uniform_scan_inclusive_logical_and(v); +;; dst[4] = sub_group_non_uniform_scan_inclusive_logical_or(v); +;; dst[5] = sub_group_non_uniform_scan_inclusive_logical_xor(v); +;; dst[6] = sub_group_non_uniform_scan_exclusive_logical_and(v); +;; dst[7] = sub_group_non_uniform_scan_exclusive_logical_or(v); +;; dst[8] = sub_group_non_uniform_scan_exclusive_logical_xor(v); +;; } + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV-DAG: OpCapability GroupNonUniformArithmetic + +; CHECK-SPIRV-DAG: %[[#bool:]] = OpTypeBool +; CHECK-SPIRV-DAG: %[[#char:]] = OpTypeInt 8 0 +; CHECK-SPIRV-DAG: %[[#short:]] = OpTypeInt 16 0 +; CHECK-SPIRV-DAG: %[[#int:]] = OpTypeInt 32 0 +; CHECK-SPIRV-DAG: %[[#long:]] = OpTypeInt 64 0 +; CHECK-SPIRV-DAG: %[[#half:]] = OpTypeFloat 16 +; CHECK-SPIRV-DAG: %[[#float:]] = OpTypeFloat 32 +; CHECK-SPIRV-DAG: %[[#double:]] = OpTypeFloat 64 + +; CHECK-SPIRV-DAG: %[[#false:]] = OpConstantFalse %[[#bool]] +; CHECK-SPIRV-DAG: %[[#ScopeSubgroup:]] = OpConstant %[[#int]] 3 +; CHECK-SPIRV-DAG: %[[#char_0:]] = OpConstant %[[#char]] 0 +; CHECK-SPIRV-DAG: %[[#short_0:]] = OpConstant %[[#short]] 0 +; CHECK-SPIRV-DAG: %[[#int_0:]] = OpConstant %[[#int]] 0 +; CHECK-SPIRV-DAG: %[[#long_0:]] = OpConstantNull %[[#long]] +; CHECK-SPIRV-DAG: %[[#half_0:]] = OpConstant %[[#half]] 0 +; CHECK-SPIRV-DAG: %[[#float_0:]] = OpConstant %[[#float]] 0 +; CHECK-SPIRV-DAG: %[[#double_0:]] = OpConstant %[[#double]] 0 + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformArithmeticChar(i8 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func signext i8 @_Z32sub_group_non_uniform_reduce_addc(i8 signext 0) + store i8 %2, i8 addrspace(1)* %0, align 1 + %3 = tail call spir_func signext i8 @_Z32sub_group_non_uniform_reduce_mulc(i8 signext 0) + %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1 + store i8 %3, i8 addrspace(1)* %4, align 1 + %5 = tail call spir_func signext i8 @_Z32sub_group_non_uniform_reduce_minc(i8 signext 0) + %6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2 + store i8 %5, i8 addrspace(1)* %6, align 1 + %7 = tail call spir_func signext i8 @_Z32sub_group_non_uniform_reduce_maxc(i8 signext 0) + %8 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 3 + store i8 %7, i8 addrspace(1)* %8, align 1 + %9 = tail call spir_func signext i8 @_Z40sub_group_non_uniform_scan_inclusive_addc(i8 signext 0) + %10 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 4 + store i8 %9, i8 addrspace(1)* %10, align 1 + %11 = tail call spir_func signext i8 @_Z40sub_group_non_uniform_scan_inclusive_mulc(i8 signext 0) + %12 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 5 + store i8 %11, i8 addrspace(1)* %12, align 1 + %13 = tail call spir_func signext i8 @_Z40sub_group_non_uniform_scan_inclusive_minc(i8 signext 0) + %14 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 6 + store i8 %13, i8 addrspace(1)* %14, align 1 + %15 = tail call spir_func signext i8 @_Z40sub_group_non_uniform_scan_inclusive_maxc(i8 signext 0) + %16 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 7 + store i8 %15, i8 addrspace(1)* %16, align 1 + %17 = tail call spir_func signext i8 @_Z40sub_group_non_uniform_scan_exclusive_addc(i8 signext 0) + %18 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 8 + store i8 %17, i8 addrspace(1)* %18, align 1 + %19 = tail call spir_func signext i8 @_Z40sub_group_non_uniform_scan_exclusive_mulc(i8 signext 0) + %20 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 9 + store i8 %19, i8 addrspace(1)* %20, align 1 + %21 = tail call spir_func signext i8 @_Z40sub_group_non_uniform_scan_exclusive_minc(i8 signext 0) + %22 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 10 + store i8 %21, i8 addrspace(1)* %22, align 1 + %23 = tail call spir_func signext i8 @_Z40sub_group_non_uniform_scan_exclusive_maxc(i8 signext 0) + %24 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 11 + store i8 %23, i8 addrspace(1)* %24, align 1 + ret void +} + +declare dso_local spir_func signext i8 @_Z32sub_group_non_uniform_reduce_addc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z32sub_group_non_uniform_reduce_mulc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z32sub_group_non_uniform_reduce_minc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z32sub_group_non_uniform_reduce_maxc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z40sub_group_non_uniform_scan_inclusive_addc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z40sub_group_non_uniform_scan_inclusive_mulc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z40sub_group_non_uniform_scan_inclusive_minc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z40sub_group_non_uniform_scan_inclusive_maxc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z40sub_group_non_uniform_scan_exclusive_addc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z40sub_group_non_uniform_scan_exclusive_mulc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z40sub_group_non_uniform_scan_exclusive_minc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z40sub_group_non_uniform_scan_exclusive_maxc(i8 signext) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformArithmeticUChar(i8 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func zeroext i8 @_Z32sub_group_non_uniform_reduce_addh(i8 zeroext 0) + store i8 %2, i8 addrspace(1)* %0, align 1 + %3 = tail call spir_func zeroext i8 @_Z32sub_group_non_uniform_reduce_mulh(i8 zeroext 0) + %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1 + store i8 %3, i8 addrspace(1)* %4, align 1 + %5 = tail call spir_func zeroext i8 @_Z32sub_group_non_uniform_reduce_minh(i8 zeroext 0) + %6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2 + store i8 %5, i8 addrspace(1)* %6, align 1 + %7 = tail call spir_func zeroext i8 @_Z32sub_group_non_uniform_reduce_maxh(i8 zeroext 0) + %8 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 3 + store i8 %7, i8 addrspace(1)* %8, align 1 + %9 = tail call spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_inclusive_addh(i8 zeroext 0) + %10 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 4 + store i8 %9, i8 addrspace(1)* %10, align 1 + %11 = tail call spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_inclusive_mulh(i8 zeroext 0) + %12 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 5 + store i8 %11, i8 addrspace(1)* %12, align 1 + %13 = tail call spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_inclusive_minh(i8 zeroext 0) + %14 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 6 + store i8 %13, i8 addrspace(1)* %14, align 1 + %15 = tail call spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_inclusive_maxh(i8 zeroext 0) + %16 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 7 + store i8 %15, i8 addrspace(1)* %16, align 1 + %17 = tail call spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_exclusive_addh(i8 zeroext 0) + %18 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 8 + store i8 %17, i8 addrspace(1)* %18, align 1 + %19 = tail call spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_exclusive_mulh(i8 zeroext 0) + %20 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 9 + store i8 %19, i8 addrspace(1)* %20, align 1 + %21 = tail call spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_exclusive_minh(i8 zeroext 0) + %22 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 10 + store i8 %21, i8 addrspace(1)* %22, align 1 + %23 = tail call spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_exclusive_maxh(i8 zeroext 0) + %24 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 11 + store i8 %23, i8 addrspace(1)* %24, align 1 + ret void +} + +declare dso_local spir_func zeroext i8 @_Z32sub_group_non_uniform_reduce_addh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z32sub_group_non_uniform_reduce_mulh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z32sub_group_non_uniform_reduce_minh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z32sub_group_non_uniform_reduce_maxh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_inclusive_addh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_inclusive_mulh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_inclusive_minh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_inclusive_maxh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_exclusive_addh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_exclusive_mulh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_exclusive_minh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_exclusive_maxh(i8 zeroext) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformArithmeticShort(i16 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func signext i16 @_Z32sub_group_non_uniform_reduce_adds(i16 signext 0) + store i16 %2, i16 addrspace(1)* %0, align 2 + %3 = tail call spir_func signext i16 @_Z32sub_group_non_uniform_reduce_muls(i16 signext 0) + %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1 + store i16 %3, i16 addrspace(1)* %4, align 2 + %5 = tail call spir_func signext i16 @_Z32sub_group_non_uniform_reduce_mins(i16 signext 0) + %6 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 2 + store i16 %5, i16 addrspace(1)* %6, align 2 + %7 = tail call spir_func signext i16 @_Z32sub_group_non_uniform_reduce_maxs(i16 signext 0) + %8 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 3 + store i16 %7, i16 addrspace(1)* %8, align 2 + %9 = tail call spir_func signext i16 @_Z40sub_group_non_uniform_scan_inclusive_adds(i16 signext 0) + %10 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 4 + store i16 %9, i16 addrspace(1)* %10, align 2 + %11 = tail call spir_func signext i16 @_Z40sub_group_non_uniform_scan_inclusive_muls(i16 signext 0) + %12 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 5 + store i16 %11, i16 addrspace(1)* %12, align 2 + %13 = tail call spir_func signext i16 @_Z40sub_group_non_uniform_scan_inclusive_mins(i16 signext 0) + %14 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 6 + store i16 %13, i16 addrspace(1)* %14, align 2 + %15 = tail call spir_func signext i16 @_Z40sub_group_non_uniform_scan_inclusive_maxs(i16 signext 0) + %16 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 7 + store i16 %15, i16 addrspace(1)* %16, align 2 + %17 = tail call spir_func signext i16 @_Z40sub_group_non_uniform_scan_exclusive_adds(i16 signext 0) + %18 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 8 + store i16 %17, i16 addrspace(1)* %18, align 2 + %19 = tail call spir_func signext i16 @_Z40sub_group_non_uniform_scan_exclusive_muls(i16 signext 0) + %20 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 9 + store i16 %19, i16 addrspace(1)* %20, align 2 + %21 = tail call spir_func signext i16 @_Z40sub_group_non_uniform_scan_exclusive_mins(i16 signext 0) + %22 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 10 + store i16 %21, i16 addrspace(1)* %22, align 2 + %23 = tail call spir_func signext i16 @_Z40sub_group_non_uniform_scan_exclusive_maxs(i16 signext 0) + %24 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 11 + store i16 %23, i16 addrspace(1)* %24, align 2 + ret void +} + +declare dso_local spir_func signext i16 @_Z32sub_group_non_uniform_reduce_adds(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z32sub_group_non_uniform_reduce_muls(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z32sub_group_non_uniform_reduce_mins(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z32sub_group_non_uniform_reduce_maxs(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z40sub_group_non_uniform_scan_inclusive_adds(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z40sub_group_non_uniform_scan_inclusive_muls(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z40sub_group_non_uniform_scan_inclusive_mins(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z40sub_group_non_uniform_scan_inclusive_maxs(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z40sub_group_non_uniform_scan_exclusive_adds(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z40sub_group_non_uniform_scan_exclusive_muls(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z40sub_group_non_uniform_scan_exclusive_mins(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z40sub_group_non_uniform_scan_exclusive_maxs(i16 signext) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformArithmeticUShort(i16 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func zeroext i16 @_Z32sub_group_non_uniform_reduce_addt(i16 zeroext 0) + store i16 %2, i16 addrspace(1)* %0, align 2 + %3 = tail call spir_func zeroext i16 @_Z32sub_group_non_uniform_reduce_mult(i16 zeroext 0) + %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1 + store i16 %3, i16 addrspace(1)* %4, align 2 + %5 = tail call spir_func zeroext i16 @_Z32sub_group_non_uniform_reduce_mint(i16 zeroext 0) + %6 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 2 + store i16 %5, i16 addrspace(1)* %6, align 2 + %7 = tail call spir_func zeroext i16 @_Z32sub_group_non_uniform_reduce_maxt(i16 zeroext 0) + %8 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 3 + store i16 %7, i16 addrspace(1)* %8, align 2 + %9 = tail call spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_inclusive_addt(i16 zeroext 0) + %10 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 4 + store i16 %9, i16 addrspace(1)* %10, align 2 + %11 = tail call spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_inclusive_mult(i16 zeroext 0) + %12 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 5 + store i16 %11, i16 addrspace(1)* %12, align 2 + %13 = tail call spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_inclusive_mint(i16 zeroext 0) + %14 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 6 + store i16 %13, i16 addrspace(1)* %14, align 2 + %15 = tail call spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_inclusive_maxt(i16 zeroext 0) + %16 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 7 + store i16 %15, i16 addrspace(1)* %16, align 2 + %17 = tail call spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_exclusive_addt(i16 zeroext 0) + %18 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 8 + store i16 %17, i16 addrspace(1)* %18, align 2 + %19 = tail call spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_exclusive_mult(i16 zeroext 0) + %20 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 9 + store i16 %19, i16 addrspace(1)* %20, align 2 + %21 = tail call spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_exclusive_mint(i16 zeroext 0) + %22 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 10 + store i16 %21, i16 addrspace(1)* %22, align 2 + %23 = tail call spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_exclusive_maxt(i16 zeroext 0) + %24 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 11 + store i16 %23, i16 addrspace(1)* %24, align 2 + ret void +} + +declare dso_local spir_func zeroext i16 @_Z32sub_group_non_uniform_reduce_addt(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z32sub_group_non_uniform_reduce_mult(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z32sub_group_non_uniform_reduce_mint(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z32sub_group_non_uniform_reduce_maxt(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_inclusive_addt(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_inclusive_mult(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_inclusive_mint(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_inclusive_maxt(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_exclusive_addt(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_exclusive_mult(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_exclusive_mint(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_exclusive_maxt(i16 zeroext) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#int]] %[[#ScopeSubgroup]] Reduce %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#int]] %[[#ScopeSubgroup]] Reduce %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#int]] %[[#ScopeSubgroup]] Reduce %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#int]] %[[#ScopeSubgroup]] Reduce %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#int]] %[[#ScopeSubgroup]] InclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#int]] %[[#ScopeSubgroup]] InclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#int]] %[[#ScopeSubgroup]] InclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#int]] %[[#ScopeSubgroup]] InclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformArithmeticInt(i32 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i32 @_Z32sub_group_non_uniform_reduce_addi(i32 0) + store i32 %2, i32 addrspace(1)* %0, align 4 + %3 = tail call spir_func i32 @_Z32sub_group_non_uniform_reduce_muli(i32 0) + %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 + store i32 %3, i32 addrspace(1)* %4, align 4 + %5 = tail call spir_func i32 @_Z32sub_group_non_uniform_reduce_mini(i32 0) + %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2 + store i32 %5, i32 addrspace(1)* %6, align 4 + %7 = tail call spir_func i32 @_Z32sub_group_non_uniform_reduce_maxi(i32 0) + %8 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 3 + store i32 %7, i32 addrspace(1)* %8, align 4 + %9 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_addi(i32 0) + %10 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 4 + store i32 %9, i32 addrspace(1)* %10, align 4 + %11 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_muli(i32 0) + %12 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 5 + store i32 %11, i32 addrspace(1)* %12, align 4 + %13 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_mini(i32 0) + %14 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 6 + store i32 %13, i32 addrspace(1)* %14, align 4 + %15 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_maxi(i32 0) + %16 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 7 + store i32 %15, i32 addrspace(1)* %16, align 4 + %17 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_addi(i32 0) + %18 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 8 + store i32 %17, i32 addrspace(1)* %18, align 4 + %19 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_muli(i32 0) + %20 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 9 + store i32 %19, i32 addrspace(1)* %20, align 4 + %21 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_mini(i32 0) + %22 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 10 + store i32 %21, i32 addrspace(1)* %22, align 4 + %23 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_maxi(i32 0) + %24 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 11 + store i32 %23, i32 addrspace(1)* %24, align 4 + ret void +} + +declare dso_local spir_func i32 @_Z32sub_group_non_uniform_reduce_addi(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z32sub_group_non_uniform_reduce_muli(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z32sub_group_non_uniform_reduce_mini(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z32sub_group_non_uniform_reduce_maxi(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_addi(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_muli(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_mini(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_maxi(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_addi(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_muli(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_mini(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_maxi(i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#int]] %[[#ScopeSubgroup]] Reduce %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#int]] %[[#ScopeSubgroup]] Reduce %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#int]] %[[#ScopeSubgroup]] Reduce %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#int]] %[[#ScopeSubgroup]] Reduce %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#int]] %[[#ScopeSubgroup]] InclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#int]] %[[#ScopeSubgroup]] InclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#int]] %[[#ScopeSubgroup]] InclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#int]] %[[#ScopeSubgroup]] InclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformArithmeticUInt(i32 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i32 @_Z32sub_group_non_uniform_reduce_addj(i32 0) + store i32 %2, i32 addrspace(1)* %0, align 4 + %3 = tail call spir_func i32 @_Z32sub_group_non_uniform_reduce_mulj(i32 0) + %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 + store i32 %3, i32 addrspace(1)* %4, align 4 + %5 = tail call spir_func i32 @_Z32sub_group_non_uniform_reduce_minj(i32 0) + %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2 + store i32 %5, i32 addrspace(1)* %6, align 4 + %7 = tail call spir_func i32 @_Z32sub_group_non_uniform_reduce_maxj(i32 0) + %8 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 3 + store i32 %7, i32 addrspace(1)* %8, align 4 + %9 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_addj(i32 0) + %10 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 4 + store i32 %9, i32 addrspace(1)* %10, align 4 + %11 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_mulj(i32 0) + %12 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 5 + store i32 %11, i32 addrspace(1)* %12, align 4 + %13 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_minj(i32 0) + %14 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 6 + store i32 %13, i32 addrspace(1)* %14, align 4 + %15 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_maxj(i32 0) + %16 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 7 + store i32 %15, i32 addrspace(1)* %16, align 4 + %17 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_addj(i32 0) + %18 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 8 + store i32 %17, i32 addrspace(1)* %18, align 4 + %19 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_mulj(i32 0) + %20 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 9 + store i32 %19, i32 addrspace(1)* %20, align 4 + %21 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_minj(i32 0) + %22 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 10 + store i32 %21, i32 addrspace(1)* %22, align 4 + %23 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_maxj(i32 0) + %24 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 11 + store i32 %23, i32 addrspace(1)* %24, align 4 + ret void +} + +declare dso_local spir_func i32 @_Z32sub_group_non_uniform_reduce_addj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z32sub_group_non_uniform_reduce_mulj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z32sub_group_non_uniform_reduce_minj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z32sub_group_non_uniform_reduce_maxj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_addj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_mulj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_minj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_maxj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_addj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_mulj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_minj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_maxj(i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#long]] %[[#ScopeSubgroup]] Reduce %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#long]] %[[#ScopeSubgroup]] Reduce %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#long]] %[[#ScopeSubgroup]] Reduce %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#long]] %[[#ScopeSubgroup]] Reduce %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#long]] %[[#ScopeSubgroup]] InclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#long]] %[[#ScopeSubgroup]] InclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#long]] %[[#ScopeSubgroup]] InclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#long]] %[[#ScopeSubgroup]] InclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#long]] %[[#ScopeSubgroup]] ExclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#long]] %[[#ScopeSubgroup]] ExclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#long]] %[[#ScopeSubgroup]] ExclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#long]] %[[#ScopeSubgroup]] ExclusiveScan %[[#long_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformArithmeticLong(i64 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i64 @_Z32sub_group_non_uniform_reduce_addl(i64 0) + store i64 %2, i64 addrspace(1)* %0, align 8 + %3 = tail call spir_func i64 @_Z32sub_group_non_uniform_reduce_mull(i64 0) + %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1 + store i64 %3, i64 addrspace(1)* %4, align 8 + %5 = tail call spir_func i64 @_Z32sub_group_non_uniform_reduce_minl(i64 0) + %6 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 2 + store i64 %5, i64 addrspace(1)* %6, align 8 + %7 = tail call spir_func i64 @_Z32sub_group_non_uniform_reduce_maxl(i64 0) + %8 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 3 + store i64 %7, i64 addrspace(1)* %8, align 8 + %9 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_addl(i64 0) + %10 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 4 + store i64 %9, i64 addrspace(1)* %10, align 8 + %11 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_mull(i64 0) + %12 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 5 + store i64 %11, i64 addrspace(1)* %12, align 8 + %13 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_minl(i64 0) + %14 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 6 + store i64 %13, i64 addrspace(1)* %14, align 8 + %15 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_maxl(i64 0) + %16 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 7 + store i64 %15, i64 addrspace(1)* %16, align 8 + %17 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_addl(i64 0) + %18 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 8 + store i64 %17, i64 addrspace(1)* %18, align 8 + %19 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_mull(i64 0) + %20 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 9 + store i64 %19, i64 addrspace(1)* %20, align 8 + %21 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_minl(i64 0) + %22 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 10 + store i64 %21, i64 addrspace(1)* %22, align 8 + %23 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_maxl(i64 0) + %24 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 11 + store i64 %23, i64 addrspace(1)* %24, align 8 + ret void +} + +declare dso_local spir_func i64 @_Z32sub_group_non_uniform_reduce_addl(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z32sub_group_non_uniform_reduce_mull(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z32sub_group_non_uniform_reduce_minl(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z32sub_group_non_uniform_reduce_maxl(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_addl(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_mull(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_minl(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_maxl(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_addl(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_mull(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_minl(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_maxl(i64) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#long]] %[[#ScopeSubgroup]] Reduce %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#long]] %[[#ScopeSubgroup]] Reduce %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#long]] %[[#ScopeSubgroup]] Reduce %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#long]] %[[#ScopeSubgroup]] Reduce %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#long]] %[[#ScopeSubgroup]] InclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#long]] %[[#ScopeSubgroup]] InclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#long]] %[[#ScopeSubgroup]] InclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#long]] %[[#ScopeSubgroup]] InclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#long]] %[[#ScopeSubgroup]] ExclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#long]] %[[#ScopeSubgroup]] ExclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#long]] %[[#ScopeSubgroup]] ExclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#long]] %[[#ScopeSubgroup]] ExclusiveScan %[[#long_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformArithmeticULong(i64 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i64 @_Z32sub_group_non_uniform_reduce_addm(i64 0) + store i64 %2, i64 addrspace(1)* %0, align 8 + %3 = tail call spir_func i64 @_Z32sub_group_non_uniform_reduce_mulm(i64 0) + %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1 + store i64 %3, i64 addrspace(1)* %4, align 8 + %5 = tail call spir_func i64 @_Z32sub_group_non_uniform_reduce_minm(i64 0) + %6 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 2 + store i64 %5, i64 addrspace(1)* %6, align 8 + %7 = tail call spir_func i64 @_Z32sub_group_non_uniform_reduce_maxm(i64 0) + %8 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 3 + store i64 %7, i64 addrspace(1)* %8, align 8 + %9 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_addm(i64 0) + %10 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 4 + store i64 %9, i64 addrspace(1)* %10, align 8 + %11 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_mulm(i64 0) + %12 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 5 + store i64 %11, i64 addrspace(1)* %12, align 8 + %13 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_minm(i64 0) + %14 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 6 + store i64 %13, i64 addrspace(1)* %14, align 8 + %15 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_maxm(i64 0) + %16 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 7 + store i64 %15, i64 addrspace(1)* %16, align 8 + %17 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_addm(i64 0) + %18 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 8 + store i64 %17, i64 addrspace(1)* %18, align 8 + %19 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_mulm(i64 0) + %20 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 9 + store i64 %19, i64 addrspace(1)* %20, align 8 + %21 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_minm(i64 0) + %22 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 10 + store i64 %21, i64 addrspace(1)* %22, align 8 + %23 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_maxm(i64 0) + %24 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 11 + store i64 %23, i64 addrspace(1)* %24, align 8 + ret void +} + +declare dso_local spir_func i64 @_Z32sub_group_non_uniform_reduce_addm(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z32sub_group_non_uniform_reduce_mulm(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z32sub_group_non_uniform_reduce_minm(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z32sub_group_non_uniform_reduce_maxm(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_addm(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_mulm(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_minm(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_maxm(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_addm(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_mulm(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_minm(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_maxm(i64) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#float]] %[[#ScopeSubgroup]] Reduce %[[#float_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#float]] %[[#ScopeSubgroup]] Reduce %[[#float_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#float]] %[[#ScopeSubgroup]] Reduce %[[#float_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMax %[[#float]] %[[#ScopeSubgroup]] Reduce %[[#float_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#float]] %[[#ScopeSubgroup]] InclusiveScan %[[#float_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#float]] %[[#ScopeSubgroup]] InclusiveScan %[[#float_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#float]] %[[#ScopeSubgroup]] InclusiveScan %[[#float_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMax %[[#float]] %[[#ScopeSubgroup]] InclusiveScan %[[#float_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#float]] %[[#ScopeSubgroup]] ExclusiveScan %[[#float_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#float]] %[[#ScopeSubgroup]] ExclusiveScan %[[#float_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#float]] %[[#ScopeSubgroup]] ExclusiveScan %[[#float_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMax %[[#float]] %[[#ScopeSubgroup]] ExclusiveScan %[[#float_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformArithmeticFloat(float addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func float @_Z32sub_group_non_uniform_reduce_addf(float 0.000000e+00) + store float %2, float addrspace(1)* %0, align 4 + %3 = tail call spir_func float @_Z32sub_group_non_uniform_reduce_mulf(float 0.000000e+00) + %4 = getelementptr inbounds float, float addrspace(1)* %0, i64 1 + store float %3, float addrspace(1)* %4, align 4 + %5 = tail call spir_func float @_Z32sub_group_non_uniform_reduce_minf(float 0.000000e+00) + %6 = getelementptr inbounds float, float addrspace(1)* %0, i64 2 + store float %5, float addrspace(1)* %6, align 4 + %7 = tail call spir_func float @_Z32sub_group_non_uniform_reduce_maxf(float 0.000000e+00) + %8 = getelementptr inbounds float, float addrspace(1)* %0, i64 3 + store float %7, float addrspace(1)* %8, align 4 + %9 = tail call spir_func float @_Z40sub_group_non_uniform_scan_inclusive_addf(float 0.000000e+00) + %10 = getelementptr inbounds float, float addrspace(1)* %0, i64 4 + store float %9, float addrspace(1)* %10, align 4 + %11 = tail call spir_func float @_Z40sub_group_non_uniform_scan_inclusive_mulf(float 0.000000e+00) + %12 = getelementptr inbounds float, float addrspace(1)* %0, i64 5 + store float %11, float addrspace(1)* %12, align 4 + %13 = tail call spir_func float @_Z40sub_group_non_uniform_scan_inclusive_minf(float 0.000000e+00) + %14 = getelementptr inbounds float, float addrspace(1)* %0, i64 6 + store float %13, float addrspace(1)* %14, align 4 + %15 = tail call spir_func float @_Z40sub_group_non_uniform_scan_inclusive_maxf(float 0.000000e+00) + %16 = getelementptr inbounds float, float addrspace(1)* %0, i64 7 + store float %15, float addrspace(1)* %16, align 4 + %17 = tail call spir_func float @_Z40sub_group_non_uniform_scan_exclusive_addf(float 0.000000e+00) + %18 = getelementptr inbounds float, float addrspace(1)* %0, i64 8 + store float %17, float addrspace(1)* %18, align 4 + %19 = tail call spir_func float @_Z40sub_group_non_uniform_scan_exclusive_mulf(float 0.000000e+00) + %20 = getelementptr inbounds float, float addrspace(1)* %0, i64 9 + store float %19, float addrspace(1)* %20, align 4 + %21 = tail call spir_func float @_Z40sub_group_non_uniform_scan_exclusive_minf(float 0.000000e+00) + %22 = getelementptr inbounds float, float addrspace(1)* %0, i64 10 + store float %21, float addrspace(1)* %22, align 4 + %23 = tail call spir_func float @_Z40sub_group_non_uniform_scan_exclusive_maxf(float 0.000000e+00) + %24 = getelementptr inbounds float, float addrspace(1)* %0, i64 11 + store float %23, float addrspace(1)* %24, align 4 + ret void +} + +declare dso_local spir_func float @_Z32sub_group_non_uniform_reduce_addf(float) local_unnamed_addr + +declare dso_local spir_func float @_Z32sub_group_non_uniform_reduce_mulf(float) local_unnamed_addr + +declare dso_local spir_func float @_Z32sub_group_non_uniform_reduce_minf(float) local_unnamed_addr + +declare dso_local spir_func float @_Z32sub_group_non_uniform_reduce_maxf(float) local_unnamed_addr + +declare dso_local spir_func float @_Z40sub_group_non_uniform_scan_inclusive_addf(float) local_unnamed_addr + +declare dso_local spir_func float @_Z40sub_group_non_uniform_scan_inclusive_mulf(float) local_unnamed_addr + +declare dso_local spir_func float @_Z40sub_group_non_uniform_scan_inclusive_minf(float) local_unnamed_addr + +declare dso_local spir_func float @_Z40sub_group_non_uniform_scan_inclusive_maxf(float) local_unnamed_addr + +declare dso_local spir_func float @_Z40sub_group_non_uniform_scan_exclusive_addf(float) local_unnamed_addr + +declare dso_local spir_func float @_Z40sub_group_non_uniform_scan_exclusive_mulf(float) local_unnamed_addr + +declare dso_local spir_func float @_Z40sub_group_non_uniform_scan_exclusive_minf(float) local_unnamed_addr + +declare dso_local spir_func float @_Z40sub_group_non_uniform_scan_exclusive_maxf(float) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#half]] %[[#ScopeSubgroup]] Reduce %[[#half_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#half]] %[[#ScopeSubgroup]] Reduce %[[#half_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#half]] %[[#ScopeSubgroup]] Reduce %[[#half_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMax %[[#half]] %[[#ScopeSubgroup]] Reduce %[[#half_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#half]] %[[#ScopeSubgroup]] InclusiveScan %[[#half_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#half]] %[[#ScopeSubgroup]] InclusiveScan %[[#half_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#half]] %[[#ScopeSubgroup]] InclusiveScan %[[#half_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMax %[[#half]] %[[#ScopeSubgroup]] InclusiveScan %[[#half_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#half]] %[[#ScopeSubgroup]] ExclusiveScan %[[#half_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#half]] %[[#ScopeSubgroup]] ExclusiveScan %[[#half_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#half]] %[[#ScopeSubgroup]] ExclusiveScan %[[#half_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMax %[[#half]] %[[#ScopeSubgroup]] ExclusiveScan %[[#half_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformArithmeticHalf(half addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func half @_Z32sub_group_non_uniform_reduce_addDh(half 0xH0000) + store half %2, half addrspace(1)* %0, align 2 + %3 = tail call spir_func half @_Z32sub_group_non_uniform_reduce_mulDh(half 0xH0000) + %4 = getelementptr inbounds half, half addrspace(1)* %0, i64 1 + store half %3, half addrspace(1)* %4, align 2 + %5 = tail call spir_func half @_Z32sub_group_non_uniform_reduce_minDh(half 0xH0000) + %6 = getelementptr inbounds half, half addrspace(1)* %0, i64 2 + store half %5, half addrspace(1)* %6, align 2 + %7 = tail call spir_func half @_Z32sub_group_non_uniform_reduce_maxDh(half 0xH0000) + %8 = getelementptr inbounds half, half addrspace(1)* %0, i64 3 + store half %7, half addrspace(1)* %8, align 2 + %9 = tail call spir_func half @_Z40sub_group_non_uniform_scan_inclusive_addDh(half 0xH0000) + %10 = getelementptr inbounds half, half addrspace(1)* %0, i64 4 + store half %9, half addrspace(1)* %10, align 2 + %11 = tail call spir_func half @_Z40sub_group_non_uniform_scan_inclusive_mulDh(half 0xH0000) + %12 = getelementptr inbounds half, half addrspace(1)* %0, i64 5 + store half %11, half addrspace(1)* %12, align 2 + %13 = tail call spir_func half @_Z40sub_group_non_uniform_scan_inclusive_minDh(half 0xH0000) + %14 = getelementptr inbounds half, half addrspace(1)* %0, i64 6 + store half %13, half addrspace(1)* %14, align 2 + %15 = tail call spir_func half @_Z40sub_group_non_uniform_scan_inclusive_maxDh(half 0xH0000) + %16 = getelementptr inbounds half, half addrspace(1)* %0, i64 7 + store half %15, half addrspace(1)* %16, align 2 + %17 = tail call spir_func half @_Z40sub_group_non_uniform_scan_exclusive_addDh(half 0xH0000) + %18 = getelementptr inbounds half, half addrspace(1)* %0, i64 8 + store half %17, half addrspace(1)* %18, align 2 + %19 = tail call spir_func half @_Z40sub_group_non_uniform_scan_exclusive_mulDh(half 0xH0000) + %20 = getelementptr inbounds half, half addrspace(1)* %0, i64 9 + store half %19, half addrspace(1)* %20, align 2 + %21 = tail call spir_func half @_Z40sub_group_non_uniform_scan_exclusive_minDh(half 0xH0000) + %22 = getelementptr inbounds half, half addrspace(1)* %0, i64 10 + store half %21, half addrspace(1)* %22, align 2 + %23 = tail call spir_func half @_Z40sub_group_non_uniform_scan_exclusive_maxDh(half 0xH0000) + %24 = getelementptr inbounds half, half addrspace(1)* %0, i64 11 + store half %23, half addrspace(1)* %24, align 2 + ret void +} + +declare dso_local spir_func half @_Z32sub_group_non_uniform_reduce_addDh(half) local_unnamed_addr + +declare dso_local spir_func half @_Z32sub_group_non_uniform_reduce_mulDh(half) local_unnamed_addr + +declare dso_local spir_func half @_Z32sub_group_non_uniform_reduce_minDh(half) local_unnamed_addr + +declare dso_local spir_func half @_Z32sub_group_non_uniform_reduce_maxDh(half) local_unnamed_addr + +declare dso_local spir_func half @_Z40sub_group_non_uniform_scan_inclusive_addDh(half) local_unnamed_addr + +declare dso_local spir_func half @_Z40sub_group_non_uniform_scan_inclusive_mulDh(half) local_unnamed_addr + +declare dso_local spir_func half @_Z40sub_group_non_uniform_scan_inclusive_minDh(half) local_unnamed_addr + +declare dso_local spir_func half @_Z40sub_group_non_uniform_scan_inclusive_maxDh(half) local_unnamed_addr + +declare dso_local spir_func half @_Z40sub_group_non_uniform_scan_exclusive_addDh(half) local_unnamed_addr + +declare dso_local spir_func half @_Z40sub_group_non_uniform_scan_exclusive_mulDh(half) local_unnamed_addr + +declare dso_local spir_func half @_Z40sub_group_non_uniform_scan_exclusive_minDh(half) local_unnamed_addr + +declare dso_local spir_func half @_Z40sub_group_non_uniform_scan_exclusive_maxDh(half) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#double]] %[[#ScopeSubgroup]] Reduce %[[#double_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#double]] %[[#ScopeSubgroup]] Reduce %[[#double_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#double]] %[[#ScopeSubgroup]] Reduce %[[#double_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMax %[[#double]] %[[#ScopeSubgroup]] Reduce %[[#double_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#double]] %[[#ScopeSubgroup]] InclusiveScan %[[#double_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#double]] %[[#ScopeSubgroup]] InclusiveScan %[[#double_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#double]] %[[#ScopeSubgroup]] InclusiveScan %[[#double_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMax %[[#double]] %[[#ScopeSubgroup]] InclusiveScan %[[#double_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#double]] %[[#ScopeSubgroup]] ExclusiveScan %[[#double_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#double]] %[[#ScopeSubgroup]] ExclusiveScan %[[#double_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#double]] %[[#ScopeSubgroup]] ExclusiveScan %[[#double_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMax %[[#double]] %[[#ScopeSubgroup]] ExclusiveScan %[[#double_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformArithmeticDouble(double addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func double @_Z32sub_group_non_uniform_reduce_addd(double 0.000000e+00) + store double %2, double addrspace(1)* %0, align 8 + %3 = tail call spir_func double @_Z32sub_group_non_uniform_reduce_muld(double 0.000000e+00) + %4 = getelementptr inbounds double, double addrspace(1)* %0, i64 1 + store double %3, double addrspace(1)* %4, align 8 + %5 = tail call spir_func double @_Z32sub_group_non_uniform_reduce_mind(double 0.000000e+00) + %6 = getelementptr inbounds double, double addrspace(1)* %0, i64 2 + store double %5, double addrspace(1)* %6, align 8 + %7 = tail call spir_func double @_Z32sub_group_non_uniform_reduce_maxd(double 0.000000e+00) + %8 = getelementptr inbounds double, double addrspace(1)* %0, i64 3 + store double %7, double addrspace(1)* %8, align 8 + %9 = tail call spir_func double @_Z40sub_group_non_uniform_scan_inclusive_addd(double 0.000000e+00) + %10 = getelementptr inbounds double, double addrspace(1)* %0, i64 4 + store double %9, double addrspace(1)* %10, align 8 + %11 = tail call spir_func double @_Z40sub_group_non_uniform_scan_inclusive_muld(double 0.000000e+00) + %12 = getelementptr inbounds double, double addrspace(1)* %0, i64 5 + store double %11, double addrspace(1)* %12, align 8 + %13 = tail call spir_func double @_Z40sub_group_non_uniform_scan_inclusive_mind(double 0.000000e+00) + %14 = getelementptr inbounds double, double addrspace(1)* %0, i64 6 + store double %13, double addrspace(1)* %14, align 8 + %15 = tail call spir_func double @_Z40sub_group_non_uniform_scan_inclusive_maxd(double 0.000000e+00) + %16 = getelementptr inbounds double, double addrspace(1)* %0, i64 7 + store double %15, double addrspace(1)* %16, align 8 + %17 = tail call spir_func double @_Z40sub_group_non_uniform_scan_exclusive_addd(double 0.000000e+00) + %18 = getelementptr inbounds double, double addrspace(1)* %0, i64 8 + store double %17, double addrspace(1)* %18, align 8 + %19 = tail call spir_func double @_Z40sub_group_non_uniform_scan_exclusive_muld(double 0.000000e+00) + %20 = getelementptr inbounds double, double addrspace(1)* %0, i64 9 + store double %19, double addrspace(1)* %20, align 8 + %21 = tail call spir_func double @_Z40sub_group_non_uniform_scan_exclusive_mind(double 0.000000e+00) + %22 = getelementptr inbounds double, double addrspace(1)* %0, i64 10 + store double %21, double addrspace(1)* %22, align 8 + %23 = tail call spir_func double @_Z40sub_group_non_uniform_scan_exclusive_maxd(double 0.000000e+00) + %24 = getelementptr inbounds double, double addrspace(1)* %0, i64 11 + store double %23, double addrspace(1)* %24, align 8 + ret void +} + +declare dso_local spir_func double @_Z32sub_group_non_uniform_reduce_addd(double) local_unnamed_addr + +declare dso_local spir_func double @_Z32sub_group_non_uniform_reduce_muld(double) local_unnamed_addr + +declare dso_local spir_func double @_Z32sub_group_non_uniform_reduce_mind(double) local_unnamed_addr + +declare dso_local spir_func double @_Z32sub_group_non_uniform_reduce_maxd(double) local_unnamed_addr + +declare dso_local spir_func double @_Z40sub_group_non_uniform_scan_inclusive_addd(double) local_unnamed_addr + +declare dso_local spir_func double @_Z40sub_group_non_uniform_scan_inclusive_muld(double) local_unnamed_addr + +declare dso_local spir_func double @_Z40sub_group_non_uniform_scan_inclusive_mind(double) local_unnamed_addr + +declare dso_local spir_func double @_Z40sub_group_non_uniform_scan_inclusive_maxd(double) local_unnamed_addr + +declare dso_local spir_func double @_Z40sub_group_non_uniform_scan_exclusive_addd(double) local_unnamed_addr + +declare dso_local spir_func double @_Z40sub_group_non_uniform_scan_exclusive_muld(double) local_unnamed_addr + +declare dso_local spir_func double @_Z40sub_group_non_uniform_scan_exclusive_mind(double) local_unnamed_addr + +declare dso_local spir_func double @_Z40sub_group_non_uniform_scan_exclusive_maxd(double) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformBitwiseChar(i8 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func signext i8 @_Z32sub_group_non_uniform_reduce_andc(i8 signext 0) + store i8 %2, i8 addrspace(1)* %0, align 1 + %3 = tail call spir_func signext i8 @_Z31sub_group_non_uniform_reduce_orc(i8 signext 0) + %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1 + store i8 %3, i8 addrspace(1)* %4, align 1 + %5 = tail call spir_func signext i8 @_Z32sub_group_non_uniform_reduce_xorc(i8 signext 0) + %6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2 + store i8 %5, i8 addrspace(1)* %6, align 1 + %7 = tail call spir_func signext i8 @_Z40sub_group_non_uniform_scan_inclusive_andc(i8 signext 0) + %8 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 3 + store i8 %7, i8 addrspace(1)* %8, align 1 + %9 = tail call spir_func signext i8 @_Z39sub_group_non_uniform_scan_inclusive_orc(i8 signext 0) + %10 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 4 + store i8 %9, i8 addrspace(1)* %10, align 1 + %11 = tail call spir_func signext i8 @_Z40sub_group_non_uniform_scan_inclusive_xorc(i8 signext 0) + %12 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 5 + store i8 %11, i8 addrspace(1)* %12, align 1 + %13 = tail call spir_func signext i8 @_Z40sub_group_non_uniform_scan_exclusive_andc(i8 signext 0) + %14 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 6 + store i8 %13, i8 addrspace(1)* %14, align 1 + %15 = tail call spir_func signext i8 @_Z39sub_group_non_uniform_scan_exclusive_orc(i8 signext 0) + %16 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 7 + store i8 %15, i8 addrspace(1)* %16, align 1 + %17 = tail call spir_func signext i8 @_Z40sub_group_non_uniform_scan_exclusive_xorc(i8 signext 0) + %18 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 8 + store i8 %17, i8 addrspace(1)* %18, align 1 + ret void +} + +declare dso_local spir_func signext i8 @_Z32sub_group_non_uniform_reduce_andc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z31sub_group_non_uniform_reduce_orc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z32sub_group_non_uniform_reduce_xorc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z40sub_group_non_uniform_scan_inclusive_andc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z39sub_group_non_uniform_scan_inclusive_orc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z40sub_group_non_uniform_scan_inclusive_xorc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z40sub_group_non_uniform_scan_exclusive_andc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z39sub_group_non_uniform_scan_exclusive_orc(i8 signext) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z40sub_group_non_uniform_scan_exclusive_xorc(i8 signext) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformBitwiseUChar(i8 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func zeroext i8 @_Z32sub_group_non_uniform_reduce_andh(i8 zeroext 0) + store i8 %2, i8 addrspace(1)* %0, align 1 + %3 = tail call spir_func zeroext i8 @_Z31sub_group_non_uniform_reduce_orh(i8 zeroext 0) + %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1 + store i8 %3, i8 addrspace(1)* %4, align 1 + %5 = tail call spir_func zeroext i8 @_Z32sub_group_non_uniform_reduce_xorh(i8 zeroext 0) + %6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2 + store i8 %5, i8 addrspace(1)* %6, align 1 + %7 = tail call spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_inclusive_andh(i8 zeroext 0) + %8 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 3 + store i8 %7, i8 addrspace(1)* %8, align 1 + %9 = tail call spir_func zeroext i8 @_Z39sub_group_non_uniform_scan_inclusive_orh(i8 zeroext 0) + %10 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 4 + store i8 %9, i8 addrspace(1)* %10, align 1 + %11 = tail call spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_inclusive_xorh(i8 zeroext 0) + %12 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 5 + store i8 %11, i8 addrspace(1)* %12, align 1 + %13 = tail call spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_exclusive_andh(i8 zeroext 0) + %14 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 6 + store i8 %13, i8 addrspace(1)* %14, align 1 + %15 = tail call spir_func zeroext i8 @_Z39sub_group_non_uniform_scan_exclusive_orh(i8 zeroext 0) + %16 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 7 + store i8 %15, i8 addrspace(1)* %16, align 1 + %17 = tail call spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_exclusive_xorh(i8 zeroext 0) + %18 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 8 + store i8 %17, i8 addrspace(1)* %18, align 1 + ret void +} + +declare dso_local spir_func zeroext i8 @_Z32sub_group_non_uniform_reduce_andh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z31sub_group_non_uniform_reduce_orh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z32sub_group_non_uniform_reduce_xorh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_inclusive_andh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z39sub_group_non_uniform_scan_inclusive_orh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_inclusive_xorh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_exclusive_andh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z39sub_group_non_uniform_scan_exclusive_orh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z40sub_group_non_uniform_scan_exclusive_xorh(i8 zeroext) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformBitwiseShort(i16 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func signext i16 @_Z32sub_group_non_uniform_reduce_ands(i16 signext 0) + store i16 %2, i16 addrspace(1)* %0, align 2 + %3 = tail call spir_func signext i16 @_Z31sub_group_non_uniform_reduce_ors(i16 signext 0) + %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1 + store i16 %3, i16 addrspace(1)* %4, align 2 + %5 = tail call spir_func signext i16 @_Z32sub_group_non_uniform_reduce_xors(i16 signext 0) + %6 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 2 + store i16 %5, i16 addrspace(1)* %6, align 2 + %7 = tail call spir_func signext i16 @_Z40sub_group_non_uniform_scan_inclusive_ands(i16 signext 0) + %8 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 3 + store i16 %7, i16 addrspace(1)* %8, align 2 + %9 = tail call spir_func signext i16 @_Z39sub_group_non_uniform_scan_inclusive_ors(i16 signext 0) + %10 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 4 + store i16 %9, i16 addrspace(1)* %10, align 2 + %11 = tail call spir_func signext i16 @_Z40sub_group_non_uniform_scan_inclusive_xors(i16 signext 0) + %12 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 5 + store i16 %11, i16 addrspace(1)* %12, align 2 + %13 = tail call spir_func signext i16 @_Z40sub_group_non_uniform_scan_exclusive_ands(i16 signext 0) + %14 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 6 + store i16 %13, i16 addrspace(1)* %14, align 2 + %15 = tail call spir_func signext i16 @_Z39sub_group_non_uniform_scan_exclusive_ors(i16 signext 0) + %16 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 7 + store i16 %15, i16 addrspace(1)* %16, align 2 + %17 = tail call spir_func signext i16 @_Z40sub_group_non_uniform_scan_exclusive_xors(i16 signext 0) + %18 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 8 + store i16 %17, i16 addrspace(1)* %18, align 2 + ret void +} + +declare dso_local spir_func signext i16 @_Z32sub_group_non_uniform_reduce_ands(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z31sub_group_non_uniform_reduce_ors(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z32sub_group_non_uniform_reduce_xors(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z40sub_group_non_uniform_scan_inclusive_ands(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z39sub_group_non_uniform_scan_inclusive_ors(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z40sub_group_non_uniform_scan_inclusive_xors(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z40sub_group_non_uniform_scan_exclusive_ands(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z39sub_group_non_uniform_scan_exclusive_ors(i16 signext) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z40sub_group_non_uniform_scan_exclusive_xors(i16 signext) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#short]] %[[#ScopeSubgroup]] ExclusiveScan %[[#short_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformBitwiseUShort(i16 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func zeroext i16 @_Z32sub_group_non_uniform_reduce_andt(i16 zeroext 0) + store i16 %2, i16 addrspace(1)* %0, align 2 + %3 = tail call spir_func zeroext i16 @_Z31sub_group_non_uniform_reduce_ort(i16 zeroext 0) + %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1 + store i16 %3, i16 addrspace(1)* %4, align 2 + %5 = tail call spir_func zeroext i16 @_Z32sub_group_non_uniform_reduce_xort(i16 zeroext 0) + %6 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 2 + store i16 %5, i16 addrspace(1)* %6, align 2 + %7 = tail call spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_inclusive_andt(i16 zeroext 0) + %8 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 3 + store i16 %7, i16 addrspace(1)* %8, align 2 + %9 = tail call spir_func zeroext i16 @_Z39sub_group_non_uniform_scan_inclusive_ort(i16 zeroext 0) + %10 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 4 + store i16 %9, i16 addrspace(1)* %10, align 2 + %11 = tail call spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_inclusive_xort(i16 zeroext 0) + %12 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 5 + store i16 %11, i16 addrspace(1)* %12, align 2 + %13 = tail call spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_exclusive_andt(i16 zeroext 0) + %14 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 6 + store i16 %13, i16 addrspace(1)* %14, align 2 + %15 = tail call spir_func zeroext i16 @_Z39sub_group_non_uniform_scan_exclusive_ort(i16 zeroext 0) + %16 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 7 + store i16 %15, i16 addrspace(1)* %16, align 2 + %17 = tail call spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_exclusive_xort(i16 zeroext 0) + %18 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 8 + store i16 %17, i16 addrspace(1)* %18, align 2 + ret void +} + +declare dso_local spir_func zeroext i16 @_Z32sub_group_non_uniform_reduce_andt(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z31sub_group_non_uniform_reduce_ort(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z32sub_group_non_uniform_reduce_xort(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_inclusive_andt(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z39sub_group_non_uniform_scan_inclusive_ort(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_inclusive_xort(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_exclusive_andt(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z39sub_group_non_uniform_scan_exclusive_ort(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_exclusive_xort(i16 zeroext) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#int]] %[[#ScopeSubgroup]] Reduce %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#int]] %[[#ScopeSubgroup]] Reduce %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#int]] %[[#ScopeSubgroup]] Reduce %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#int]] %[[#ScopeSubgroup]] InclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#int]] %[[#ScopeSubgroup]] InclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#int]] %[[#ScopeSubgroup]] InclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformBitwiseInt(i32 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i32 @_Z32sub_group_non_uniform_reduce_andi(i32 0) + store i32 %2, i32 addrspace(1)* %0, align 4 + %3 = tail call spir_func i32 @_Z31sub_group_non_uniform_reduce_ori(i32 0) + %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 + store i32 %3, i32 addrspace(1)* %4, align 4 + %5 = tail call spir_func i32 @_Z32sub_group_non_uniform_reduce_xori(i32 0) + %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2 + store i32 %5, i32 addrspace(1)* %6, align 4 + %7 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_andi(i32 0) + %8 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 3 + store i32 %7, i32 addrspace(1)* %8, align 4 + %9 = tail call spir_func i32 @_Z39sub_group_non_uniform_scan_inclusive_ori(i32 0) + %10 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 4 + store i32 %9, i32 addrspace(1)* %10, align 4 + %11 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_xori(i32 0) + %12 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 5 + store i32 %11, i32 addrspace(1)* %12, align 4 + %13 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_andi(i32 0) + %14 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 6 + store i32 %13, i32 addrspace(1)* %14, align 4 + %15 = tail call spir_func i32 @_Z39sub_group_non_uniform_scan_exclusive_ori(i32 0) + %16 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 7 + store i32 %15, i32 addrspace(1)* %16, align 4 + %17 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_xori(i32 0) + %18 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 8 + store i32 %17, i32 addrspace(1)* %18, align 4 + ret void +} + +declare dso_local spir_func i32 @_Z32sub_group_non_uniform_reduce_andi(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z31sub_group_non_uniform_reduce_ori(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z32sub_group_non_uniform_reduce_xori(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_andi(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z39sub_group_non_uniform_scan_inclusive_ori(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_xori(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_andi(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z39sub_group_non_uniform_scan_exclusive_ori(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_xori(i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#int]] %[[#ScopeSubgroup]] Reduce %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#int]] %[[#ScopeSubgroup]] Reduce %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#int]] %[[#ScopeSubgroup]] Reduce %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#int]] %[[#ScopeSubgroup]] InclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#int]] %[[#ScopeSubgroup]] InclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#int]] %[[#ScopeSubgroup]] InclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformBitwiseUInt(i32 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i32 @_Z32sub_group_non_uniform_reduce_andj(i32 0) + store i32 %2, i32 addrspace(1)* %0, align 4 + %3 = tail call spir_func i32 @_Z31sub_group_non_uniform_reduce_orj(i32 0) + %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 + store i32 %3, i32 addrspace(1)* %4, align 4 + %5 = tail call spir_func i32 @_Z32sub_group_non_uniform_reduce_xorj(i32 0) + %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2 + store i32 %5, i32 addrspace(1)* %6, align 4 + %7 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_andj(i32 0) + %8 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 3 + store i32 %7, i32 addrspace(1)* %8, align 4 + %9 = tail call spir_func i32 @_Z39sub_group_non_uniform_scan_inclusive_orj(i32 0) + %10 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 4 + store i32 %9, i32 addrspace(1)* %10, align 4 + %11 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_xorj(i32 0) + %12 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 5 + store i32 %11, i32 addrspace(1)* %12, align 4 + %13 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_andj(i32 0) + %14 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 6 + store i32 %13, i32 addrspace(1)* %14, align 4 + %15 = tail call spir_func i32 @_Z39sub_group_non_uniform_scan_exclusive_orj(i32 0) + %16 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 7 + store i32 %15, i32 addrspace(1)* %16, align 4 + %17 = tail call spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_xorj(i32 0) + %18 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 8 + store i32 %17, i32 addrspace(1)* %18, align 4 + ret void +} + +declare dso_local spir_func i32 @_Z32sub_group_non_uniform_reduce_andj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z31sub_group_non_uniform_reduce_orj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z32sub_group_non_uniform_reduce_xorj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_andj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z39sub_group_non_uniform_scan_inclusive_orj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_xorj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_andj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z39sub_group_non_uniform_scan_exclusive_orj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_xorj(i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#long]] %[[#ScopeSubgroup]] Reduce %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#long]] %[[#ScopeSubgroup]] Reduce %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#long]] %[[#ScopeSubgroup]] Reduce %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#long]] %[[#ScopeSubgroup]] InclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#long]] %[[#ScopeSubgroup]] InclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#long]] %[[#ScopeSubgroup]] InclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#long]] %[[#ScopeSubgroup]] ExclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#long]] %[[#ScopeSubgroup]] ExclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#long]] %[[#ScopeSubgroup]] ExclusiveScan %[[#long_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformBitwiseLong(i64 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i64 @_Z32sub_group_non_uniform_reduce_andl(i64 0) + store i64 %2, i64 addrspace(1)* %0, align 8 + %3 = tail call spir_func i64 @_Z31sub_group_non_uniform_reduce_orl(i64 0) + %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1 + store i64 %3, i64 addrspace(1)* %4, align 8 + %5 = tail call spir_func i64 @_Z32sub_group_non_uniform_reduce_xorl(i64 0) + %6 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 2 + store i64 %5, i64 addrspace(1)* %6, align 8 + %7 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_andl(i64 0) + %8 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 3 + store i64 %7, i64 addrspace(1)* %8, align 8 + %9 = tail call spir_func i64 @_Z39sub_group_non_uniform_scan_inclusive_orl(i64 0) + %10 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 4 + store i64 %9, i64 addrspace(1)* %10, align 8 + %11 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_xorl(i64 0) + %12 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 5 + store i64 %11, i64 addrspace(1)* %12, align 8 + %13 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_andl(i64 0) + %14 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 6 + store i64 %13, i64 addrspace(1)* %14, align 8 + %15 = tail call spir_func i64 @_Z39sub_group_non_uniform_scan_exclusive_orl(i64 0) + %16 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 7 + store i64 %15, i64 addrspace(1)* %16, align 8 + %17 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_xorl(i64 0) + %18 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 8 + store i64 %17, i64 addrspace(1)* %18, align 8 + ret void +} + +declare dso_local spir_func i64 @_Z32sub_group_non_uniform_reduce_andl(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z31sub_group_non_uniform_reduce_orl(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z32sub_group_non_uniform_reduce_xorl(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_andl(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z39sub_group_non_uniform_scan_inclusive_orl(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_xorl(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_andl(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z39sub_group_non_uniform_scan_exclusive_orl(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_xorl(i64) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#long]] %[[#ScopeSubgroup]] Reduce %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#long]] %[[#ScopeSubgroup]] Reduce %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#long]] %[[#ScopeSubgroup]] Reduce %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#long]] %[[#ScopeSubgroup]] InclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#long]] %[[#ScopeSubgroup]] InclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#long]] %[[#ScopeSubgroup]] InclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#long]] %[[#ScopeSubgroup]] ExclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#long]] %[[#ScopeSubgroup]] ExclusiveScan %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#long]] %[[#ScopeSubgroup]] ExclusiveScan %[[#long_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformBitwiseULong(i64 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i64 @_Z32sub_group_non_uniform_reduce_andm(i64 0) + store i64 %2, i64 addrspace(1)* %0, align 8 + %3 = tail call spir_func i64 @_Z31sub_group_non_uniform_reduce_orm(i64 0) + %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1 + store i64 %3, i64 addrspace(1)* %4, align 8 + %5 = tail call spir_func i64 @_Z32sub_group_non_uniform_reduce_xorm(i64 0) + %6 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 2 + store i64 %5, i64 addrspace(1)* %6, align 8 + %7 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_andm(i64 0) + %8 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 3 + store i64 %7, i64 addrspace(1)* %8, align 8 + %9 = tail call spir_func i64 @_Z39sub_group_non_uniform_scan_inclusive_orm(i64 0) + %10 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 4 + store i64 %9, i64 addrspace(1)* %10, align 8 + %11 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_xorm(i64 0) + %12 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 5 + store i64 %11, i64 addrspace(1)* %12, align 8 + %13 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_andm(i64 0) + %14 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 6 + store i64 %13, i64 addrspace(1)* %14, align 8 + %15 = tail call spir_func i64 @_Z39sub_group_non_uniform_scan_exclusive_orm(i64 0) + %16 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 7 + store i64 %15, i64 addrspace(1)* %16, align 8 + %17 = tail call spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_xorm(i64 0) + %18 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 8 + store i64 %17, i64 addrspace(1)* %18, align 8 + ret void +} + +declare dso_local spir_func i64 @_Z32sub_group_non_uniform_reduce_andm(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z31sub_group_non_uniform_reduce_orm(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z32sub_group_non_uniform_reduce_xorm(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_andm(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z39sub_group_non_uniform_scan_inclusive_orm(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_inclusive_xorm(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_andm(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z39sub_group_non_uniform_scan_exclusive_orm(i64) local_unnamed_addr + +declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_xorm(i64) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalAnd %[[#bool]] %[[#ScopeSubgroup]] Reduce %[[#false]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalOr %[[#bool]] %[[#ScopeSubgroup]] Reduce %[[#false]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalXor %[[#bool]] %[[#ScopeSubgroup]] Reduce %[[#false]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalAnd %[[#bool]] %[[#ScopeSubgroup]] InclusiveScan %[[#false]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalOr %[[#bool]] %[[#ScopeSubgroup]] InclusiveScan %[[#false]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalXor %[[#bool]] %[[#ScopeSubgroup]] InclusiveScan %[[#false]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalAnd %[[#bool]] %[[#ScopeSubgroup]] ExclusiveScan %[[#false]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalOr %[[#bool]] %[[#ScopeSubgroup]] ExclusiveScan %[[#false]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalXor %[[#bool]] %[[#ScopeSubgroup]] ExclusiveScan %[[#false]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testNonUniformLogical(i32 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i32 @_Z40sub_group_non_uniform_reduce_logical_andi(i32 0) + store i32 %2, i32 addrspace(1)* %0, align 4 + %3 = tail call spir_func i32 @_Z39sub_group_non_uniform_reduce_logical_ori(i32 0) + %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 + store i32 %3, i32 addrspace(1)* %4, align 4 + %5 = tail call spir_func i32 @_Z40sub_group_non_uniform_reduce_logical_xori(i32 0) + %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2 + store i32 %5, i32 addrspace(1)* %6, align 4 + %7 = tail call spir_func i32 @_Z48sub_group_non_uniform_scan_inclusive_logical_andi(i32 0) + %8 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 3 + store i32 %7, i32 addrspace(1)* %8, align 4 + %9 = tail call spir_func i32 @_Z47sub_group_non_uniform_scan_inclusive_logical_ori(i32 0) + %10 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 4 + store i32 %9, i32 addrspace(1)* %10, align 4 + %11 = tail call spir_func i32 @_Z48sub_group_non_uniform_scan_inclusive_logical_xori(i32 0) + %12 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 5 + store i32 %11, i32 addrspace(1)* %12, align 4 + %13 = tail call spir_func i32 @_Z48sub_group_non_uniform_scan_exclusive_logical_andi(i32 0) + %14 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 6 + store i32 %13, i32 addrspace(1)* %14, align 4 + %15 = tail call spir_func i32 @_Z47sub_group_non_uniform_scan_exclusive_logical_ori(i32 0) + %16 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 7 + store i32 %15, i32 addrspace(1)* %16, align 4 + %17 = tail call spir_func i32 @_Z48sub_group_non_uniform_scan_exclusive_logical_xori(i32 0) + %18 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 8 + store i32 %17, i32 addrspace(1)* %18, align 4 + ret void +} + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_reduce_logical_andi(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z39sub_group_non_uniform_reduce_logical_ori(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z40sub_group_non_uniform_reduce_logical_xori(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z48sub_group_non_uniform_scan_inclusive_logical_andi(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z47sub_group_non_uniform_scan_inclusive_logical_ori(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z48sub_group_non_uniform_scan_inclusive_logical_xori(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z48sub_group_non_uniform_scan_exclusive_logical_andi(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z47sub_group_non_uniform_scan_exclusive_logical_ori(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z48sub_group_non_uniform_scan_exclusive_logical_xori(i32) local_unnamed_addr diff --git a/llvm/test/CodeGen/SPIRV/transcoding/sub_group_non_uniform_vote.ll b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_non_uniform_vote.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_non_uniform_vote.ll @@ -0,0 +1,183 @@ +;; #pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_vote : enable +;; +;; kernel void testSubGroupElect(global int* dst) { +;; dst[0] = sub_group_elect(); +;; } +;; +;; kernel void testSubGroupNonUniformAll(global int* dst) { +;; dst[0] = sub_group_non_uniform_all(0); +;; } +;; +;; kernel void testSubGroupNonUniformAny(global int* dst) { +;; dst[0] = sub_group_non_uniform_any(0); +;; } +;; +;; #pragma OPENCL EXTENSION cl_khr_fp16 : enable +;; #pragma OPENCL EXTENSION cl_khr_fp64 : enable +;; kernel void testSubGroupNonUniformAllEqual(global int* dst) { +;; { +;; char v = 0; +;; dst[0] = sub_group_non_uniform_all_equal( v ); +;; } +;; { +;; uchar v = 0; +;; dst[0] = sub_group_non_uniform_all_equal( v ); +;; } +;; { +;; short v = 0; +;; dst[0] = sub_group_non_uniform_all_equal( v ); +;; } +;; { +;; ushort v = 0; +;; dst[0] = sub_group_non_uniform_all_equal( v ); +;; } +;; { +;; int v = 0; +;; dst[0] = sub_group_non_uniform_all_equal( v ); +;; } +;; { +;; uint v = 0; +;; dst[0] = sub_group_non_uniform_all_equal( v ); +;; } +;; { +;; long v = 0; +;; dst[0] = sub_group_non_uniform_all_equal( v ); +;; } +;; { +;; ulong v = 0; +;; dst[0] = sub_group_non_uniform_all_equal( v ); +;; } +;; { +;; float v = 0; +;; dst[0] = sub_group_non_uniform_all_equal( v ); +;; } +;; { +;; half v = 0; +;; dst[0] = sub_group_non_uniform_all_equal( v ); +;; } +;; { +;; double v = 0; +;; dst[0] = sub_group_non_uniform_all_equal( v ); +;; } +;; } + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV-DAG: OpCapability GroupNonUniformVote + +; CHECK-SPIRV-DAG: %[[#bool:]] = OpTypeBool +; CHECK-SPIRV-DAG: %[[#char:]] = OpTypeInt 8 0 +; CHECK-SPIRV-DAG: %[[#short:]] = OpTypeInt 16 0 +; CHECK-SPIRV-DAG: %[[#int:]] = OpTypeInt 32 0 +; CHECK-SPIRV-DAG: %[[#long:]] = OpTypeInt 64 0 +; CHECK-SPIRV-DAG: %[[#half:]] = OpTypeFloat 16 +; CHECK-SPIRV-DAG: %[[#float:]] = OpTypeFloat 32 +; CHECK-SPIRV-DAG: %[[#double:]] = OpTypeFloat 64 + +; CHECK-SPIRV-DAG: %[[#false:]] = OpConstantFalse %[[#bool]] +; CHECK-SPIRV-DAG: %[[#ScopeSubgroup:]] = OpConstant %[[#int]] 3 +; CHECK-SPIRV-DAG: %[[#char_0:]] = OpConstant %[[#char]] 0 +; CHECK-SPIRV-DAG: %[[#short_0:]] = OpConstant %[[#short]] 0 +; CHECK-SPIRV-DAG: %[[#int_0:]] = OpConstant %[[#int]] 0 +; CHECK-SPIRV-DAG: %[[#long_0:]] = OpConstantNull %[[#long]] +; CHECK-SPIRV-DAG: %[[#half_0:]] = OpConstant %[[#half]] 0 +; CHECK-SPIRV-DAG: %[[#float_0:]] = OpConstant %[[#float]] 0 +; CHECK-SPIRV-DAG: %[[#double_0:]] = OpConstant %[[#double]] 0 + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformElect %[[#bool]] %[[#ScopeSubgroup]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testSubGroupElect(i32 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i32 @_Z15sub_group_electv() + store i32 %2, i32 addrspace(1)* %0, align 4 + ret void +} + +declare dso_local spir_func i32 @_Z15sub_group_electv() local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAll %[[#bool]] %[[#ScopeSubgroup]] %[[#false]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testSubGroupNonUniformAll(i32 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i32 @_Z25sub_group_non_uniform_alli(i32 0) + store i32 %2, i32 addrspace(1)* %0, align 4 + ret void +} + +declare dso_local spir_func i32 @_Z25sub_group_non_uniform_alli(i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAny %[[#bool]] %[[#ScopeSubgroup]] %[[#false]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testSubGroupNonUniformAny(i32 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i32 @_Z25sub_group_non_uniform_anyi(i32 0) + store i32 %2, i32 addrspace(1)* %0, align 4 + ret void +} + +declare dso_local spir_func i32 @_Z25sub_group_non_uniform_anyi(i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAllEqual %[[#bool]] %[[#ScopeSubgroup]] %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAllEqual %[[#bool]] %[[#ScopeSubgroup]] %[[#char_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAllEqual %[[#bool]] %[[#ScopeSubgroup]] %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAllEqual %[[#bool]] %[[#ScopeSubgroup]] %[[#short_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAllEqual %[[#bool]] %[[#ScopeSubgroup]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAllEqual %[[#bool]] %[[#ScopeSubgroup]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAllEqual %[[#bool]] %[[#ScopeSubgroup]] %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAllEqual %[[#bool]] %[[#ScopeSubgroup]] %[[#long_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAllEqual %[[#bool]] %[[#ScopeSubgroup]] %[[#float_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAllEqual %[[#bool]] %[[#ScopeSubgroup]] %[[#half_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAllEqual %[[#bool]] %[[#ScopeSubgroup]] %[[#double_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testSubGroupNonUniformAllEqual(i32 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i32 @_Z31sub_group_non_uniform_all_equalc(i8 signext 0) + store i32 %2, i32 addrspace(1)* %0, align 4 + %3 = tail call spir_func i32 @_Z31sub_group_non_uniform_all_equalh(i8 zeroext 0) + store i32 %3, i32 addrspace(1)* %0, align 4 + %4 = tail call spir_func i32 @_Z31sub_group_non_uniform_all_equals(i16 signext 0) + store i32 %4, i32 addrspace(1)* %0, align 4 + %5 = tail call spir_func i32 @_Z31sub_group_non_uniform_all_equalt(i16 zeroext 0) + store i32 %5, i32 addrspace(1)* %0, align 4 + %6 = tail call spir_func i32 @_Z31sub_group_non_uniform_all_equali(i32 0) + store i32 %6, i32 addrspace(1)* %0, align 4 + %7 = tail call spir_func i32 @_Z31sub_group_non_uniform_all_equalj(i32 0) + store i32 %7, i32 addrspace(1)* %0, align 4 + %8 = tail call spir_func i32 @_Z31sub_group_non_uniform_all_equall(i64 0) + store i32 %8, i32 addrspace(1)* %0, align 4 + %9 = tail call spir_func i32 @_Z31sub_group_non_uniform_all_equalm(i64 0) + store i32 %9, i32 addrspace(1)* %0, align 4 + %10 = tail call spir_func i32 @_Z31sub_group_non_uniform_all_equalf(float 0.000000e+00) + store i32 %10, i32 addrspace(1)* %0, align 4 + %11 = tail call spir_func i32 @_Z31sub_group_non_uniform_all_equalDh(half 0xH0000) + store i32 %11, i32 addrspace(1)* %0, align 4 + %12 = tail call spir_func i32 @_Z31sub_group_non_uniform_all_equald(double 0.000000e+00) + store i32 %12, i32 addrspace(1)* %0, align 4 + ret void +} + +declare dso_local spir_func i32 @_Z31sub_group_non_uniform_all_equalc(i8 signext) local_unnamed_addr + +declare dso_local spir_func i32 @_Z31sub_group_non_uniform_all_equalh(i8 zeroext) local_unnamed_addr + +declare dso_local spir_func i32 @_Z31sub_group_non_uniform_all_equals(i16 signext) local_unnamed_addr + +declare dso_local spir_func i32 @_Z31sub_group_non_uniform_all_equalt(i16 zeroext) local_unnamed_addr + +declare dso_local spir_func i32 @_Z31sub_group_non_uniform_all_equali(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z31sub_group_non_uniform_all_equalj(i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z31sub_group_non_uniform_all_equall(i64) local_unnamed_addr + +declare dso_local spir_func i32 @_Z31sub_group_non_uniform_all_equalm(i64) local_unnamed_addr + +declare dso_local spir_func i32 @_Z31sub_group_non_uniform_all_equalf(float) local_unnamed_addr + +declare dso_local spir_func i32 @_Z31sub_group_non_uniform_all_equalDh(half) local_unnamed_addr + +declare dso_local spir_func i32 @_Z31sub_group_non_uniform_all_equald(double) local_unnamed_addr diff --git a/llvm/test/CodeGen/SPIRV/transcoding/sub_group_shuffle.ll b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_shuffle.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_shuffle.ll @@ -0,0 +1,299 @@ +;; #pragma OPENCL EXTENSION cl_khr_subgroup_shuffle : enable +;; #pragma OPENCL EXTENSION cl_khr_fp16 : enable +;; #pragma OPENCL EXTENSION cl_khr_fp64 : enable +;; +;; kernel void testShuffleChar(global char* dst) +;; { +;; char v = 0; +;; dst[0] = sub_group_shuffle( v, 0 ); +;; dst[1] = sub_group_shuffle_xor( v, 0 ); +;; } +;; +;; kernel void testShuffleUChar(global uchar* dst) +;; { +;; uchar v = 0; +;; dst[0] = sub_group_shuffle( v, 0 ); +;; dst[1] = sub_group_shuffle_xor( v, 0 ); +;; } +;; +;; kernel void testShuffleShort(global short* dst) +;; { +;; short v = 0; +;; dst[0] = sub_group_shuffle( v, 0 ); +;; dst[1] = sub_group_shuffle_xor( v, 0 ); +;; } +;; +;; kernel void testShuffleUShort(global ushort* dst) +;; { +;; ushort v = 0; +;; dst[0] = sub_group_shuffle( v, 0 ); +;; dst[1] = sub_group_shuffle_xor( v, 0 ); +;; } +;; +;; kernel void testShuffleInt(global int* dst) +;; { +;; int v = 0; +;; dst[0] = sub_group_shuffle( v, 0 ); +;; dst[1] = sub_group_shuffle_xor( v, 0 ); +;; } +;; +;; kernel void testShuffleUInt(global uint* dst) +;; { +;; uint v = 0; +;; dst[0] = sub_group_shuffle( v, 0 ); +;; dst[1] = sub_group_shuffle_xor( v, 0 ); +;; } +;; +;; kernel void testShuffleLong(global long* dst) +;; { +;; long v = 0; +;; dst[0] = sub_group_shuffle( v, 0 ); +;; dst[1] = sub_group_shuffle_xor( v, 0 ); +;; } +;; +;; kernel void testShuffleULong(global ulong* dst) +;; { +;; ulong v = 0; +;; dst[0] = sub_group_shuffle( v, 0 ); +;; dst[1] = sub_group_shuffle_xor( v, 0 ); +;; } +;; +;; kernel void testShuffleFloat(global float* dst) +;; { +;; float v = 0; +;; dst[0] = sub_group_shuffle( v, 0 ); +;; dst[1] = sub_group_shuffle_xor( v, 0 ); +;; } +;; +;; kernel void testShuffleHalf(global half* dst) +;; { +;; half v = 0; +;; dst[0] = sub_group_shuffle( v, 0 ); +;; dst[1] = sub_group_shuffle_xor( v, 0 ); +;; } +;; +;; kernel void testShuffleDouble(global double* dst) +;; { +;; double v = 0; +;; dst[0] = sub_group_shuffle( v, 0 ); +;; dst[1] = sub_group_shuffle_xor( v, 0 ); +;; } + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV-DAG: OpCapability GroupNonUniformShuffle + +; CHECK-SPIRV-DAG: %[[#char:]] = OpTypeInt 8 0 +; CHECK-SPIRV-DAG: %[[#short:]] = OpTypeInt 16 0 +; CHECK-SPIRV-DAG: %[[#int:]] = OpTypeInt 32 0 +; CHECK-SPIRV-DAG: %[[#long:]] = OpTypeInt 64 0 +; CHECK-SPIRV-DAG: %[[#half:]] = OpTypeFloat 16 +; CHECK-SPIRV-DAG: %[[#float:]] = OpTypeFloat 32 +; CHECK-SPIRV-DAG: %[[#double:]] = OpTypeFloat 64 + +; CHECK-SPIRV-DAG: %[[#ScopeSubgroup:]] = OpConstant %[[#int]] 3 +; CHECK-SPIRV-DAG: %[[#char_0:]] = OpConstant %[[#char]] 0 +; CHECK-SPIRV-DAG: %[[#short_0:]] = OpConstant %[[#short]] 0 +; CHECK-SPIRV-DAG: %[[#int_0:]] = OpConstant %[[#int]] 0 +; CHECK-SPIRV-DAG: %[[#long_0:]] = OpConstantNull %[[#long]] +; CHECK-SPIRV-DAG: %[[#half_0:]] = OpConstant %[[#half]] 0 +; CHECK-SPIRV-DAG: %[[#float_0:]] = OpConstant %[[#float]] 0 +; CHECK-SPIRV-DAG: %[[#double_0:]] = OpConstant %[[#double]] 0 + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffle %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleXor %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleChar(i8 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func signext i8 @_Z17sub_group_shufflecj(i8 signext 0, i32 0) + store i8 %2, i8 addrspace(1)* %0, align 1 + %3 = tail call spir_func signext i8 @_Z21sub_group_shuffle_xorcj(i8 signext 0, i32 0) + %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1 + store i8 %3, i8 addrspace(1)* %4, align 1 + ret void +} + +declare dso_local spir_func signext i8 @_Z17sub_group_shufflecj(i8 signext, i32) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z21sub_group_shuffle_xorcj(i8 signext, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffle %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleXor %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleUChar(i8 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func zeroext i8 @_Z17sub_group_shufflehj(i8 zeroext 0, i32 0) + store i8 %2, i8 addrspace(1)* %0, align 1 + %3 = tail call spir_func zeroext i8 @_Z21sub_group_shuffle_xorhj(i8 zeroext 0, i32 0) + %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1 + store i8 %3, i8 addrspace(1)* %4, align 1 + ret void +} + +declare dso_local spir_func zeroext i8 @_Z17sub_group_shufflehj(i8 zeroext, i32) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z21sub_group_shuffle_xorhj(i8 zeroext, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffle %[[#short]] %[[#ScopeSubgroup]] %[[#short_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleXor %[[#short]] %[[#ScopeSubgroup]] %[[#short_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleShort(i16 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func signext i16 @_Z17sub_group_shufflesj(i16 signext 0, i32 0) + store i16 %2, i16 addrspace(1)* %0, align 2 + %3 = tail call spir_func signext i16 @_Z21sub_group_shuffle_xorsj(i16 signext 0, i32 0) + %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1 + store i16 %3, i16 addrspace(1)* %4, align 2 + ret void +} + +declare dso_local spir_func signext i16 @_Z17sub_group_shufflesj(i16 signext, i32) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z21sub_group_shuffle_xorsj(i16 signext, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffle %[[#short]] %[[#ScopeSubgroup]] %[[#short_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleXor %[[#short]] %[[#ScopeSubgroup]] %[[#short_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleUShort(i16 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func zeroext i16 @_Z17sub_group_shuffletj(i16 zeroext 0, i32 0) + store i16 %2, i16 addrspace(1)* %0, align 2 + %3 = tail call spir_func zeroext i16 @_Z21sub_group_shuffle_xortj(i16 zeroext 0, i32 0) + %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1 + store i16 %3, i16 addrspace(1)* %4, align 2 + ret void +} + +declare dso_local spir_func zeroext i16 @_Z17sub_group_shuffletj(i16 zeroext, i32) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z21sub_group_shuffle_xortj(i16 zeroext, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffle %[[#int]] %[[#ScopeSubgroup]] %[[#int_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleXor %[[#int]] %[[#ScopeSubgroup]] %[[#int_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleInt(i32 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i32 @_Z17sub_group_shuffleij(i32 0, i32 0) + store i32 %2, i32 addrspace(1)* %0, align 4 + %3 = tail call spir_func i32 @_Z21sub_group_shuffle_xorij(i32 0, i32 0) + %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 + store i32 %3, i32 addrspace(1)* %4, align 4 + ret void +} + +declare dso_local spir_func i32 @_Z17sub_group_shuffleij(i32, i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z21sub_group_shuffle_xorij(i32, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffle %[[#int]] %[[#ScopeSubgroup]] %[[#int_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleXor %[[#int]] %[[#ScopeSubgroup]] %[[#int_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleUInt(i32 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i32 @_Z17sub_group_shufflejj(i32 0, i32 0) + store i32 %2, i32 addrspace(1)* %0, align 4 + %3 = tail call spir_func i32 @_Z21sub_group_shuffle_xorjj(i32 0, i32 0) + %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 + store i32 %3, i32 addrspace(1)* %4, align 4 + ret void +} + +declare dso_local spir_func i32 @_Z17sub_group_shufflejj(i32, i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z21sub_group_shuffle_xorjj(i32, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffle %[[#long]] %[[#ScopeSubgroup]] %[[#long_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleXor %[[#long]] %[[#ScopeSubgroup]] %[[#long_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleLong(i64 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i64 @_Z17sub_group_shufflelj(i64 0, i32 0) + store i64 %2, i64 addrspace(1)* %0, align 8 + %3 = tail call spir_func i64 @_Z21sub_group_shuffle_xorlj(i64 0, i32 0) + %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1 + store i64 %3, i64 addrspace(1)* %4, align 8 + ret void +} + +declare dso_local spir_func i64 @_Z17sub_group_shufflelj(i64, i32) local_unnamed_addr + +declare dso_local spir_func i64 @_Z21sub_group_shuffle_xorlj(i64, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffle %[[#long]] %[[#ScopeSubgroup]] %[[#long_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleXor %[[#long]] %[[#ScopeSubgroup]] %[[#long_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleULong(i64 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i64 @_Z17sub_group_shufflemj(i64 0, i32 0) + store i64 %2, i64 addrspace(1)* %0, align 8 + %3 = tail call spir_func i64 @_Z21sub_group_shuffle_xormj(i64 0, i32 0) + %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1 + store i64 %3, i64 addrspace(1)* %4, align 8 + ret void +} + +declare dso_local spir_func i64 @_Z17sub_group_shufflemj(i64, i32) local_unnamed_addr + +declare dso_local spir_func i64 @_Z21sub_group_shuffle_xormj(i64, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffle %[[#float]] %[[#ScopeSubgroup]] %[[#float_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleXor %[[#float]] %[[#ScopeSubgroup]] %[[#float_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleFloat(float addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func float @_Z17sub_group_shufflefj(float 0.000000e+00, i32 0) + store float %2, float addrspace(1)* %0, align 4 + %3 = tail call spir_func float @_Z21sub_group_shuffle_xorfj(float 0.000000e+00, i32 0) + %4 = getelementptr inbounds float, float addrspace(1)* %0, i64 1 + store float %3, float addrspace(1)* %4, align 4 + ret void +} + +declare dso_local spir_func float @_Z17sub_group_shufflefj(float, i32) local_unnamed_addr + +declare dso_local spir_func float @_Z21sub_group_shuffle_xorfj(float, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffle %[[#half]] %[[#ScopeSubgroup]] %[[#half_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleXor %[[#half]] %[[#ScopeSubgroup]] %[[#half_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleHalf(half addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func half @_Z17sub_group_shuffleDhj(half 0xH0000, i32 0) + store half %2, half addrspace(1)* %0, align 2 + %3 = tail call spir_func half @_Z21sub_group_shuffle_xorDhj(half 0xH0000, i32 0) + %4 = getelementptr inbounds half, half addrspace(1)* %0, i64 1 + store half %3, half addrspace(1)* %4, align 2 + ret void +} + +declare dso_local spir_func half @_Z17sub_group_shuffleDhj(half, i32) local_unnamed_addr + +declare dso_local spir_func half @_Z21sub_group_shuffle_xorDhj(half, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffle %[[#double]] %[[#ScopeSubgroup]] %[[#double_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleXor %[[#double]] %[[#ScopeSubgroup]] %[[#double_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleDouble(double addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func double @_Z17sub_group_shuffledj(double 0.000000e+00, i32 0) + store double %2, double addrspace(1)* %0, align 8 + %3 = tail call spir_func double @_Z21sub_group_shuffle_xordj(double 0.000000e+00, i32 0) + %4 = getelementptr inbounds double, double addrspace(1)* %0, i64 1 + store double %3, double addrspace(1)* %4, align 8 + ret void +} + +declare dso_local spir_func double @_Z17sub_group_shuffledj(double, i32) local_unnamed_addr + +declare dso_local spir_func double @_Z21sub_group_shuffle_xordj(double, i32) local_unnamed_addr diff --git a/llvm/test/CodeGen/SPIRV/transcoding/sub_group_shuffle_relative.ll b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_shuffle_relative.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_shuffle_relative.ll @@ -0,0 +1,299 @@ +;; #pragma OPENCL EXTENSION cl_khr_subgroup_shuffle_relative : enable +;; #pragma OPENCL EXTENSION cl_khr_fp16 : enable +;; #pragma OPENCL EXTENSION cl_khr_fp64 : enable +;; +;; kernel void testShuffleRelativeChar(global char* dst) +;; { +;; char v = 0; +;; dst[0] = sub_group_shuffle_up( v, 0 ); +;; dst[1] = sub_group_shuffle_down( v, 0 ); +;; } +;; +;; kernel void testShuffleRelativeUChar(global uchar* dst) +;; { +;; uchar v = 0; +;; dst[0] = sub_group_shuffle_up( v, 0 ); +;; dst[1] = sub_group_shuffle_down( v, 0 ); +;; } +;; +;; kernel void testShuffleRelativeShort(global short* dst) +;; { +;; short v = 0; +;; dst[0] = sub_group_shuffle_up( v, 0 ); +;; dst[1] = sub_group_shuffle_down( v, 0 ); +;; } +;; +;; kernel void testShuffleRelativeUShort(global ushort* dst) +;; { +;; ushort v = 0; +;; dst[0] = sub_group_shuffle_up( v, 0 ); +;; dst[1] = sub_group_shuffle_down( v, 0 ); +;; } +;; +;; kernel void testShuffleRelativeInt(global int* dst) +;; { +;; int v = 0; +;; dst[0] = sub_group_shuffle_up( v, 0 ); +;; dst[1] = sub_group_shuffle_down( v, 0 ); +;; } +;; +;; kernel void testShuffleRelativeUInt(global uint* dst) +;; { +;; uint v = 0; +;; dst[0] = sub_group_shuffle_up( v, 0 ); +;; dst[1] = sub_group_shuffle_down( v, 0 ); +;; } +;; +;; kernel void testShuffleRelativeLong(global long* dst) +;; { +;; long v = 0; +;; dst[0] = sub_group_shuffle_up( v, 0 ); +;; dst[1] = sub_group_shuffle_down( v, 0 ); +;; } +;; +;; kernel void testShuffleRelativeULong(global ulong* dst) +;; { +;; ulong v = 0; +;; dst[0] = sub_group_shuffle_up( v, 0 ); +;; dst[1] = sub_group_shuffle_down( v, 0 ); +;; } +;; +;; kernel void testShuffleRelativeFloat(global float* dst) +;; { +;; float v = 0; +;; dst[0] = sub_group_shuffle_up( v, 0 ); +;; dst[1] = sub_group_shuffle_down( v, 0 ); +;; } +;; +;; kernel void testShuffleRelativeHalf(global half* dst) +;; { +;; half v = 0; +;; dst[0] = sub_group_shuffle_up( v, 0 ); +;; dst[1] = sub_group_shuffle_down( v, 0 ); +;; } +;; +;; kernel void testShuffleRelativeDouble(global double* dst) +;; { +;; double v = 0; +;; dst[0] = sub_group_shuffle_up( v, 0 ); +;; dst[1] = sub_group_shuffle_down( v, 0 ); +;; } + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV-DAG: OpCapability GroupNonUniformShuffleRelative + +; CHECK-SPIRV-DAG: %[[#char:]] = OpTypeInt 8 0 +; CHECK-SPIRV-DAG: %[[#short:]] = OpTypeInt 16 0 +; CHECK-SPIRV-DAG: %[[#int:]] = OpTypeInt 32 0 +; CHECK-SPIRV-DAG: %[[#long:]] = OpTypeInt 64 0 +; CHECK-SPIRV-DAG: %[[#half:]] = OpTypeFloat 16 +; CHECK-SPIRV-DAG: %[[#float:]] = OpTypeFloat 32 +; CHECK-SPIRV-DAG: %[[#double:]] = OpTypeFloat 64 + +; CHECK-SPIRV-DAG: %[[#ScopeSubgroup:]] = OpConstant %[[#int]] 3 +; CHECK-SPIRV-DAG: %[[#char_0:]] = OpConstant %[[#char]] 0 +; CHECK-SPIRV-DAG: %[[#short_0:]] = OpConstant %[[#short]] 0 +; CHECK-SPIRV-DAG: %[[#int_0:]] = OpConstant %[[#int]] 0 +; CHECK-SPIRV-DAG: %[[#long_0:]] = OpConstantNull %[[#long]] +; CHECK-SPIRV-DAG: %[[#half_0:]] = OpConstant %[[#half]] 0 +; CHECK-SPIRV-DAG: %[[#float_0:]] = OpConstant %[[#float]] 0 +; CHECK-SPIRV-DAG: %[[#double_0:]] = OpConstant %[[#double]] 0 + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleUp %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleDown %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleRelativeChar(i8 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func signext i8 @_Z20sub_group_shuffle_upcj(i8 signext 0, i32 0) + store i8 %2, i8 addrspace(1)* %0, align 1 + %3 = tail call spir_func signext i8 @_Z22sub_group_shuffle_downcj(i8 signext 0, i32 0) + %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1 + store i8 %3, i8 addrspace(1)* %4, align 1 + ret void +} + +declare dso_local spir_func signext i8 @_Z20sub_group_shuffle_upcj(i8 signext, i32) local_unnamed_addr + +declare dso_local spir_func signext i8 @_Z22sub_group_shuffle_downcj(i8 signext, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleUp %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleDown %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleRelativeUChar(i8 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func zeroext i8 @_Z20sub_group_shuffle_uphj(i8 zeroext 0, i32 0) + store i8 %2, i8 addrspace(1)* %0, align 1 + %3 = tail call spir_func zeroext i8 @_Z22sub_group_shuffle_downhj(i8 zeroext 0, i32 0) + %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1 + store i8 %3, i8 addrspace(1)* %4, align 1 + ret void +} + +declare dso_local spir_func zeroext i8 @_Z20sub_group_shuffle_uphj(i8 zeroext, i32) local_unnamed_addr + +declare dso_local spir_func zeroext i8 @_Z22sub_group_shuffle_downhj(i8 zeroext, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleUp %[[#short]] %[[#ScopeSubgroup]] %[[#short_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleDown %[[#short]] %[[#ScopeSubgroup]] %[[#short_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleRelativeShort(i16 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func signext i16 @_Z20sub_group_shuffle_upsj(i16 signext 0, i32 0) + store i16 %2, i16 addrspace(1)* %0, align 2 + %3 = tail call spir_func signext i16 @_Z22sub_group_shuffle_downsj(i16 signext 0, i32 0) + %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1 + store i16 %3, i16 addrspace(1)* %4, align 2 + ret void +} + +declare dso_local spir_func signext i16 @_Z20sub_group_shuffle_upsj(i16 signext, i32) local_unnamed_addr + +declare dso_local spir_func signext i16 @_Z22sub_group_shuffle_downsj(i16 signext, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleUp %[[#short]] %[[#ScopeSubgroup]] %[[#short_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleDown %[[#short]] %[[#ScopeSubgroup]] %[[#short_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleRelativeUShort(i16 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func zeroext i16 @_Z20sub_group_shuffle_uptj(i16 zeroext 0, i32 0) + store i16 %2, i16 addrspace(1)* %0, align 2 + %3 = tail call spir_func zeroext i16 @_Z22sub_group_shuffle_downtj(i16 zeroext 0, i32 0) + %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1 + store i16 %3, i16 addrspace(1)* %4, align 2 + ret void +} + +declare dso_local spir_func zeroext i16 @_Z20sub_group_shuffle_uptj(i16 zeroext, i32) local_unnamed_addr + +declare dso_local spir_func zeroext i16 @_Z22sub_group_shuffle_downtj(i16 zeroext, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleUp %[[#int]] %[[#ScopeSubgroup]] %[[#int_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleDown %[[#int]] %[[#ScopeSubgroup]] %[[#int_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleRelativeInt(i32 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i32 @_Z20sub_group_shuffle_upij(i32 0, i32 0) + store i32 %2, i32 addrspace(1)* %0, align 4 + %3 = tail call spir_func i32 @_Z22sub_group_shuffle_downij(i32 0, i32 0) + %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 + store i32 %3, i32 addrspace(1)* %4, align 4 + ret void +} + +declare dso_local spir_func i32 @_Z20sub_group_shuffle_upij(i32, i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z22sub_group_shuffle_downij(i32, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleUp %[[#int]] %[[#ScopeSubgroup]] %[[#int_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleDown %[[#int]] %[[#ScopeSubgroup]] %[[#int_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleRelativeUInt(i32 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i32 @_Z20sub_group_shuffle_upjj(i32 0, i32 0) + store i32 %2, i32 addrspace(1)* %0, align 4 + %3 = tail call spir_func i32 @_Z22sub_group_shuffle_downjj(i32 0, i32 0) + %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 + store i32 %3, i32 addrspace(1)* %4, align 4 + ret void +} + +declare dso_local spir_func i32 @_Z20sub_group_shuffle_upjj(i32, i32) local_unnamed_addr + +declare dso_local spir_func i32 @_Z22sub_group_shuffle_downjj(i32, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleUp %[[#long]] %[[#ScopeSubgroup]] %[[#long_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleDown %[[#long]] %[[#ScopeSubgroup]] %[[#long_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleRelativeLong(i64 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i64 @_Z20sub_group_shuffle_uplj(i64 0, i32 0) + store i64 %2, i64 addrspace(1)* %0, align 8 + %3 = tail call spir_func i64 @_Z22sub_group_shuffle_downlj(i64 0, i32 0) + %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1 + store i64 %3, i64 addrspace(1)* %4, align 8 + ret void +} + +declare dso_local spir_func i64 @_Z20sub_group_shuffle_uplj(i64, i32) local_unnamed_addr + +declare dso_local spir_func i64 @_Z22sub_group_shuffle_downlj(i64, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleUp %[[#long]] %[[#ScopeSubgroup]] %[[#long_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleDown %[[#long]] %[[#ScopeSubgroup]] %[[#long_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleRelativeULong(i64 addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func i64 @_Z20sub_group_shuffle_upmj(i64 0, i32 0) + store i64 %2, i64 addrspace(1)* %0, align 8 + %3 = tail call spir_func i64 @_Z22sub_group_shuffle_downmj(i64 0, i32 0) + %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1 + store i64 %3, i64 addrspace(1)* %4, align 8 + ret void +} + +declare dso_local spir_func i64 @_Z20sub_group_shuffle_upmj(i64, i32) local_unnamed_addr + +declare dso_local spir_func i64 @_Z22sub_group_shuffle_downmj(i64, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleUp %[[#float]] %[[#ScopeSubgroup]] %[[#float_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleDown %[[#float]] %[[#ScopeSubgroup]] %[[#float_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleRelativeFloat(float addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func float @_Z20sub_group_shuffle_upfj(float 0.000000e+00, i32 0) + store float %2, float addrspace(1)* %0, align 4 + %3 = tail call spir_func float @_Z22sub_group_shuffle_downfj(float 0.000000e+00, i32 0) + %4 = getelementptr inbounds float, float addrspace(1)* %0, i64 1 + store float %3, float addrspace(1)* %4, align 4 + ret void +} + +declare dso_local spir_func float @_Z20sub_group_shuffle_upfj(float, i32) local_unnamed_addr + +declare dso_local spir_func float @_Z22sub_group_shuffle_downfj(float, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleUp %[[#half]] %[[#ScopeSubgroup]] %[[#half_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleDown %[[#half]] %[[#ScopeSubgroup]] %[[#half_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleRelativeHalf(half addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func half @_Z20sub_group_shuffle_upDhj(half 0xH0000, i32 0) + store half %2, half addrspace(1)* %0, align 2 + %3 = tail call spir_func half @_Z22sub_group_shuffle_downDhj(half 0xH0000, i32 0) + %4 = getelementptr inbounds half, half addrspace(1)* %0, i64 1 + store half %3, half addrspace(1)* %4, align 2 + ret void +} + +declare dso_local spir_func half @_Z20sub_group_shuffle_upDhj(half, i32) local_unnamed_addr + +declare dso_local spir_func half @_Z22sub_group_shuffle_downDhj(half, i32) local_unnamed_addr + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleUp %[[#double]] %[[#ScopeSubgroup]] %[[#double_0]] %[[#int_0]] +; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleDown %[[#double]] %[[#ScopeSubgroup]] %[[#double_0]] %[[#int_0]] +; CHECK-SPIRV: OpFunctionEnd + +define dso_local spir_kernel void @testShuffleRelativeDouble(double addrspace(1)* nocapture) local_unnamed_addr { + %2 = tail call spir_func double @_Z20sub_group_shuffle_updj(double 0.000000e+00, i32 0) + store double %2, double addrspace(1)* %0, align 8 + %3 = tail call spir_func double @_Z22sub_group_shuffle_downdj(double 0.000000e+00, i32 0) + %4 = getelementptr inbounds double, double addrspace(1)* %0, i64 1 + store double %3, double addrspace(1)* %4, align 8 + ret void +} + +declare dso_local spir_func double @_Z20sub_group_shuffle_updj(double, i32) local_unnamed_addr + +declare dso_local spir_func double @_Z22sub_group_shuffle_downdj(double, i32) local_unnamed_addr