Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3085,6 +3085,23 @@ break; } + case AMDGPUISD::SMIN3: + case AMDGPUISD::SMAX3: + case AMDGPUISD::SMED3: + case AMDGPUISD::UMIN3: + case AMDGPUISD::UMAX3: + case AMDGPUISD::UMED3: { + APInt Op0Zero, Op0One; + APInt Op1Zero, Op1One; + APInt Op2Zero, Op2One; + DAG.computeKnownBits(Op.getOperand(0), Op0Zero, Op0One, Depth + 1); + DAG.computeKnownBits(Op.getOperand(1), Op1Zero, Op1One, Depth + 1); + DAG.computeKnownBits(Op.getOperand(2), Op2Zero, Op2One, Depth + 1); + + KnownZero = Op0Zero & Op1Zero & Op2Zero; + KnownOne = Op0One & Op1One & Op2One; + break; + } } } @@ -3116,6 +3133,27 @@ case AMDGPUISD::BORROW: return 31; + case AMDGPUISD::SMIN3: + case AMDGPUISD::SMAX3: + case AMDGPUISD::SMED3: + case AMDGPUISD::UMIN3: + case AMDGPUISD::UMAX3: + case AMDGPUISD::UMED3: { + unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (Tmp0 == 1) + return 1; // Early out. + + unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth + 1); + if (Tmp1 == 1) + return 1; // Early out. + + unsigned Tmp2 = DAG.ComputeNumSignBits(Op.getOperand(2), Depth + 1); + if (Tmp1 == 2) + return 1; // Early out. + + return std::min(Tmp0, std::min(Tmp1, Tmp2)); + } + default: return 1; } Index: test/CodeGen/AMDGPU/llvm.amdgcn.smed3.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.smed3.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.smed3.ll @@ -11,5 +11,27 @@ ret void } +; GCN-LABEL: {{^}}test_smed3_num_sign_bits: +; GCN: v_med3_i32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GCN-NEXT: buffer_store_dword [[RESULT]] +define void @test_smed3_num_sign_bits(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 zeroext %src2) #1 { + + %shl0 = shl i32 %src0, 16 + %src0.new = ashr i32 %shl0, 16 + + %shl1 = shl i32 %src1, 17 + %src1.new = ashr i32 %shl1, 17 + + %shl2 = shl i32 %src2, 18 + %src2.new = ashr i32 %shl2, 18 + + %med3 = call i32 @llvm.amdgcn.smed3(i32 %src0.new, i32 %src1.new, i32 %src2.new) + + %shl = shl i32 %med3, 16 + %sra = ashr i32 %shl, 16 + store i32 %sra, i32 addrspace(1)* %out + ret void +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind } Index: test/CodeGen/AMDGPU/llvm.amdgcn.umed3.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.umed3.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.umed3.ll @@ -11,5 +11,91 @@ ret void } +; GCN-LABEL: {{^}}test_umed3_known_bits_zext: +; GCN: v_med3_u32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GCN-NEXT: buffer_store_dword [[RESULT]] +define void @test_umed3_known_bits_zext(i32 addrspace(1)* %out, i16 zeroext %src0, i16 zeroext %src1, i16 zeroext %src2) #1 { + %src0.ext = zext i16 %src0 to i32 + %src1.ext = zext i16 %src1 to i32 + %src2.ext = zext i16 %src2 to i32 + %med3 = call i32 @llvm.amdgcn.umed3(i32 %src0.ext, i32 %src1.ext, i32 %src2.ext) + %trunc = and i32 %med3, 65535 + store i32 %trunc, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_umed3_known_sign_bits_sext_in_reg: +; GCN: v_med3_u32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GCN-NEXT: buffer_store_dword [[RESULT]] +define void @test_umed3_known_sign_bits_sext_in_reg(i32 addrspace(1)* %out, i16 signext %src0, i16 signext %src1, i16 signext %src2) #1 { + %src0.ext = sext i16 %src0 to i32 + %src1.ext = sext i16 %src1 to i32 + %src2.ext = sext i16 %src2 to i32 + %med3 = call i32 @llvm.amdgcn.umed3(i32 %src0.ext, i32 %src1.ext, i32 %src2.ext) + %shl = shl i32 %med3, 16 + %sext.in.reg = ashr i32 %shl, 16 + store i32 %sext.in.reg, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_umed3_known_2_bits: +; GCN: v_med3_u32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GCN-NEXT: buffer_store_dword [[RESULT]] +define void @test_umed3_known_2_bits(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 zeroext %src2) #1 { + + %shl0 = shl i32 %src0, 30 + %src0.new = lshr i32 %shl0, 30 + + %shl1 = shl i32 %src1, 30 + %src1.new = lshr i32 %shl1, 30 + + %shl2 = shl i32 %src2, 30 + %src2.new = lshr i32 %shl2, 30 + + %med3 = call i32 @llvm.amdgcn.umed3(i32 %src0.new, i32 %src1.new, i32 %src2.new) + %trunc = and i32 %med3, 3 + store i32 %trunc, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_umed3_known_bits_zext_unknown_src0: +; GCN: v_med3_u32 [[MED3:v[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_and_b32_e32 [[RESULT:v[0-9]+]], 0xffff, [[MED3]] +; GCN: buffer_store_dword [[RESULT]] +define void @test_umed3_known_bits_zext_unknown_src0(i32 addrspace(1)* %out, i32 %src0, i16 zeroext %src1, i16 zeroext %src2) #0 { + %src1.ext = zext i16 %src1 to i32 + %src2.ext = zext i16 %src2 to i32 + %med3 = call i32 @llvm.amdgcn.umed3(i32 %src0, i32 %src1.ext, i32 %src2.ext) + %trunc = and i32 %med3, 65535 + store i32 %trunc, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_umed3_known_bits_zext_unknown_src1: +; GCN: v_med3_u32 [[MED3:v[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_and_b32_e32 [[RESULT:v[0-9]+]], 0xffff, [[MED3]] +; GCN: buffer_store_dword [[RESULT]] +define void @test_umed3_known_bits_zext_unknown_src1(i32 addrspace(1)* %out, i16 zeroext %src0, i32 %src1, i16 zeroext %src2) #1 { + %src0.ext = zext i16 %src0 to i32 + %src2.ext = zext i16 %src2 to i32 + %med3 = call i32 @llvm.amdgcn.umed3(i32 %src0.ext, i32 %src1, i32 %src2.ext) + %trunc = and i32 %med3, 65535 + store i32 %trunc, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_umed3_known_bits_zext_unknown_src2: +; GCN: v_med3_u32 [[MED3:v[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_and_b32_e32 [[RESULT:v[0-9]+]], 0xffff, [[MED3]] +; GCN: buffer_store_dword [[RESULT]] +define void @test_umed3_known_bits_zext_unknown_src2(i32 addrspace(1)* %out, i16 zeroext %src0, i16 zeroext %src1, i32 %src2) #1 { + %src0.ext = zext i16 %src0 to i32 + %src1.ext = zext i16 %src1 to i32 + %med3 = call i32 @llvm.amdgcn.umed3(i32 %src0.ext, i32 %src1.ext, i32 %src2) + %trunc = and i32 %med3, 65535 + store i32 %trunc, i32 addrspace(1)* %out + ret void +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind }