Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -76,9 +76,10 @@ case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; case ISD::SMIN: - case ISD::SMAX: + case ISD::SMAX: Res = PromoteIntRes_SExtOrZExtIntBinOp(N, true); break; case ISD::UMIN: - case ISD::UMAX: Res = PromoteIntRes_SimpleIntBinOp(N); break; + case ISD::UMAX: Res = PromoteIntRes_SExtOrZExtIntBinOp(N, false); break; + case ISD::SHL: Res = PromoteIntRes_SHL(N); break; case ISD::SIGN_EXTEND_INREG: Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break; @@ -660,6 +661,22 @@ LHS.getValueType(), LHS, RHS); } +SDValue DAGTypeLegalizer::PromoteIntRes_SExtOrZExtIntBinOp(SDNode *N, + bool Signed) { + SDValue LHS, RHS; + + if (Signed) { + LHS = SExtPromotedInteger(N->getOperand(0)); + RHS = SExtPromotedInteger(N->getOperand(1)); + } else { + LHS = ZExtPromotedInteger(N->getOperand(0)); + RHS = ZExtPromotedInteger(N->getOperand(1)); + } + + return DAG.getNode(N->getOpcode(), SDLoc(N), + LHS.getValueType(), LHS, RHS); +} + SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -276,6 +276,7 @@ SDValue PromoteIntRes_SETCC(SDNode *N); SDValue PromoteIntRes_SHL(SDNode *N); SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N); + SDValue PromoteIntRes_SExtOrZExtIntBinOp(SDNode *N, bool Signed); SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N); SDValue PromoteIntRes_SRA(SDNode *N); SDValue PromoteIntRes_SRL(SDNode *N); Index: test/CodeGen/AMDGPU/max.ll =================================================================== --- test/CodeGen/AMDGPU/max.ll +++ test/CodeGen/AMDGPU/max.ll @@ -53,6 +53,23 @@ ret void } +; FUNC-LABEL: {{^}}v_test_imax_sge_i8: +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: v_max_i32_e32 +define void @v_test_imax_sge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid + %a = load i8, i8 addrspace(1)* %gep0, align 1 + %b = load i8, i8 addrspace(1)* %gep1, align 1 + %cmp = icmp sge i8 %a, %b + %val = select i1 %cmp, i8 %a, i8 %b + store i8 %val, i8 addrspace(1)* %outgep, align 1 + ret void +} + ; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_i32: ; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 define void @s_test_imax_sgt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { @@ -132,6 +149,23 @@ ret void } +; FUNC-LABEL: {{^}}v_test_umax_uge_i8: +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: v_max_u32_e32 +define void @v_test_umax_uge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid + %a = load i8, i8 addrspace(1)* %gep0, align 1 + %b = load i8, i8 addrspace(1)* %gep1, align 1 + %cmp = icmp uge i8 %a, %b + %val = select i1 %cmp, i8 %a, i8 %b + store i8 %val, i8 addrspace(1)* %outgep, align 1 + ret void +} + ; FUNC-LABEL: @v_test_umax_ugt_i32 ; SI: v_max_u32_e32 define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { @@ -203,6 +237,10 @@ } ; FUNC-LABEL: {{^}}s_test_imax_sge_i16: +; SI: s_load_dword +; SI: s_load_dword +; SI: s_sext_i32_i16 +; SI: s_sext_i32_i16 ; SI: s_max_i32 define void @s_test_imax_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind { %cmp = icmp sge i16 %a, %b Index: test/CodeGen/AMDGPU/min.ll =================================================================== --- test/CodeGen/AMDGPU/min.ll +++ test/CodeGen/AMDGPU/min.ll @@ -48,6 +48,10 @@ } ; FUNC-LABEL: {{^}}s_test_imin_sle_i8: +; SI: s_load_dword +; SI: s_load_dword +; SI: s_sext_i32_i8 +; SI: s_sext_i32_i8 ; SI: s_min_i32 define void @s_test_imin_sle_i8(i8 addrspace(1)* %out, i8 %a, i8 %b) nounwind { %cmp = icmp sle i8 %a, %b @@ -60,10 +64,21 @@ ; extloads with mubuf instructions. ; FUNC-LABEL: {{^}}s_test_imin_sle_v4i8: +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte + ; SI: v_min_i32 ; SI: v_min_i32 ; SI: v_min_i32 ; SI: v_min_i32 + +; SI: s_endpgm define void @s_test_imin_sle_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b) nounwind { %cmp = icmp sle <4 x i8> %a, %b %val = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b @@ -192,6 +207,23 @@ ret void } +; FUNC-LABEL: {{^}}v_test_umin_ult_i8: +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: v_min_u32_e32 +define void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid + %a = load i8, i8 addrspace(1)* %gep0, align 1 + %b = load i8, i8 addrspace(1)* %gep1, align 1 + %cmp = icmp ult i8 %a, %b + %val = select i1 %cmp, i8 %a, i8 %b + store i8 %val, i8 addrspace(1)* %outgep, align 1 + ret void +} + ; FUNC-LABEL: @s_test_umin_ult_i32 ; SI: s_min_u32 define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {