diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -1136,6 +1136,16 @@ break; } + case Intrinsic::amdgcn_wave_reduce_umin: + case Intrinsic::amdgcn_wave_reduce_umax: { + Value *Src = II.getArgOperand(0); + + // Propagate poison and constant value + if (isa(Src) || isa(Src)) + return IC.replaceInstUsesWith(II, Src); + + break; + } } if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) { diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -6194,3 +6194,61 @@ %val = call half @llvm.amdgcn.exp2.f16(half 0xH83ff) ret half %val } + +; -------------------------------------------------------------------- +; llvm.amdgcn.wave.reduce.umin.i32 +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.wave.reduce.umin.i32(i32, i32 immarg) + +define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 poison, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 123, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.wave.reduce.umin.i32 +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.wave.reduce.umax.i32(i32, i32 immarg) + +define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 poison, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 123, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +}