diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -60,6 +60,7 @@ #include #include +#include using namespace llvm; namespace { @@ -1548,6 +1549,8 @@ case Intrinsic::vector_reduce_umax: // Target intrinsics case Intrinsic::amdgcn_perm: + case Intrinsic::amdgcn_wave_reduce_umin: + case Intrinsic::amdgcn_wave_reduce_umax: case Intrinsic::arm_mve_vctp8: case Intrinsic::arm_mve_vctp16: case Intrinsic::arm_mve_vctp32: @@ -2839,6 +2842,9 @@ return Constant::getNullValue(Ty); return ConstantInt::get(Ty, C0->abs()); + case Intrinsic::amdgcn_wave_reduce_umin: + case Intrinsic::amdgcn_wave_reduce_umax: + return dyn_cast(Operands[0]); } return nullptr; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll @@ -242,72 +242,34 @@ define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { ; GFX8DAGISEL-LABEL: poison_value: ; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v0 ; GFX8DAGISEL-NEXT: s_endpgm ; ; GFX8GISEL-LABEL: poison_value: ; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v0 ; GFX8GISEL-NEXT: s_endpgm ; ; GFX9DAGISEL-LABEL: poison_value: ; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; ; GFX9GISEL-LABEL: poison_value: ; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v0, v0, s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; ; GFX10DAGISEL-LABEL: poison_value: ; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] ; GFX10DAGISEL-NEXT: s_endpgm ; ; GFX10GISEL-LABEL: poison_value: ; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v0, v0, s[0:1] ; GFX10GISEL-NEXT: s_endpgm ; ; GFX11DAGISEL-LABEL: poison_value: ; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 -; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11DAGISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11DAGISEL-NEXT: s_nop 0 -; GFX11DAGISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11DAGISEL-NEXT: s_endpgm ; ; GFX11GISEL-LABEL: poison_value: ; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 -; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11GISEL-NEXT: s_nop 0 -; GFX11GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11GISEL-NEXT: s_endpgm entry: %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 poison, i32 1) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll @@ -240,75 +240,37 @@ ret void } -define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { +define amdgpu_kernel void @poison_value(ptr addrspace(1) %out) { ; GFX8DAGISEL-LABEL: poison_value: ; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v0 ; GFX8DAGISEL-NEXT: s_endpgm ; ; GFX8GISEL-LABEL: poison_value: ; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v0 ; GFX8GISEL-NEXT: s_endpgm ; ; GFX9DAGISEL-LABEL: poison_value: ; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; ; GFX9GISEL-LABEL: poison_value: ; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v0, v0, s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; ; GFX10DAGISEL-LABEL: poison_value: ; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] ; GFX10DAGISEL-NEXT: s_endpgm ; ; GFX10GISEL-LABEL: poison_value: ; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v0, v0, s[0:1] ; GFX10GISEL-NEXT: s_endpgm ; ; GFX11DAGISEL-LABEL: poison_value: ; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 -; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11DAGISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11DAGISEL-NEXT: s_nop 0 -; GFX11DAGISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11DAGISEL-NEXT: s_endpgm ; ; GFX11GISEL-LABEL: poison_value: ; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 -; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11GISEL-NEXT: s_nop 0 -; GFX11GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11GISEL-NEXT: s_endpgm entry: %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 poison, i32 1) @@ -316,7 +278,7 @@ ret void } -define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { +define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8DAGISEL-LABEL: divergent_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instsimplify -S | FileCheck %s + +; -------------------------------------------------------------------- +; llvm.amdgcn.wave.reduce.umin.i32 +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.wave.reduce.umin.i32(i32, i32 immarg) + +define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 poison, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 123, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.wave.reduce.umin.i32 +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.wave.reduce.umax.i32(i32, i32 immarg) + +define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 poison, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 123, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +@gv = constant i32 0 +define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_gv(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_gv( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 ptrtoint (ptr @gv to i32), i32 1) +; CHECK-NEXT: store i32 [[RESULT]], ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 ptrtoint (ptr @gv to i32), i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +}