Index: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -290,15 +290,19 @@ // Custom handling for i8 intrinsics setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); - setOperationAction(ISD::CTLZ, MVT::i16, Legal); - setOperationAction(ISD::CTLZ, MVT::i32, Legal); - setOperationAction(ISD::CTLZ, MVT::i64, Legal); + for (const auto& Ty : {MVT::i16, MVT::i32, MVT::i64}) { + setOperationAction(ISD::SMIN, Ty, Legal); + setOperationAction(ISD::SMAX, Ty, Legal); + setOperationAction(ISD::UMIN, Ty, Legal); + setOperationAction(ISD::UMAX, Ty, Legal); + + setOperationAction(ISD::CTPOP, Ty, Legal); + setOperationAction(ISD::CTLZ, Ty, Legal); + } + setOperationAction(ISD::CTTZ, MVT::i16, Expand); setOperationAction(ISD::CTTZ, MVT::i32, Expand); setOperationAction(ISD::CTTZ, MVT::i64, Expand); - setOperationAction(ISD::CTPOP, MVT::i16, Legal); - setOperationAction(ISD::CTPOP, MVT::i32, Legal); - setOperationAction(ISD::CTPOP, MVT::i64, Legal); // PTX does not directly support SELP of i1, so promote to i32 first setOperationAction(ISD::SELECT, MVT::i1, Custom); @@ -313,7 +317,6 @@ setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::SHL); - setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::SREM); setTargetDAGCombine(ISD::UREM); @@ -4159,67 +4162,6 @@ return SDValue(); } -static SDValue PerformSELECTCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - // Currently this detects patterns for integer min and max and - // lowers them to PTX-specific intrinsics that enable hardware - // support. - - const SDValue Cond = N->getOperand(0); - if (Cond.getOpcode() != ISD::SETCC) return SDValue(); - - const SDValue LHS = Cond.getOperand(0); - const SDValue RHS = Cond.getOperand(1); - const SDValue True = N->getOperand(1); - const SDValue False = N->getOperand(2); - if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) - return SDValue(); - - const EVT VT = N->getValueType(0); - if (VT != MVT::i32 && VT != MVT::i64) return SDValue(); - - const ISD::CondCode CC = cast(Cond.getOperand(2))->get(); - SDValue Larger; // The larger of LHS and RHS when condition is true. - switch (CC) { - case ISD::SETULT: - case ISD::SETULE: - case ISD::SETLT: - case ISD::SETLE: - Larger = RHS; - break; - - case ISD::SETGT: - case ISD::SETGE: - case ISD::SETUGT: - case ISD::SETUGE: - Larger = LHS; - break; - - default: - return SDValue(); - } - const bool IsMax = (Larger == True); - const bool IsSigned = ISD::isSignedIntSetCC(CC); - - unsigned IntrinsicId; - if (VT == MVT::i32) { - if (IsSigned) - IntrinsicId = IsMax ? Intrinsic::nvvm_max_i : Intrinsic::nvvm_min_i; - else - IntrinsicId = IsMax ? Intrinsic::nvvm_max_ui : Intrinsic::nvvm_min_ui; - } else { - assert(VT == MVT::i64); - if (IsSigned) - IntrinsicId = IsMax ? Intrinsic::nvvm_max_ll : Intrinsic::nvvm_min_ll; - else - IntrinsicId = IsMax ? Intrinsic::nvvm_max_ull : Intrinsic::nvvm_min_ull; - } - - SDLoc DL(N); - return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DCI.DAG.getConstant(IntrinsicId, DL, VT), LHS, RHS); -} - static SDValue PerformREMCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOpt::Level OptLevel) { @@ -4429,8 +4371,6 @@ return PerformSHLCombine(N, DCI, OptLevel); case ISD::AND: return PerformANDCombine(N, DCI); - case ISD::SELECT: - return PerformSELECTCombine(N, DCI); case ISD::UREM: case ISD::SREM: return PerformREMCombine(N, DCI, OptLevel); Index: llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td +++ llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -529,6 +529,12 @@ defm ABS_32 : ABS; defm ABS_64 : ABS; +// Integer min/max. +defm SMAX : I3<"max.s", smax>; +defm UMAX : I3<"max.u", umax>; +defm SMIN : I3<"min.s", smin>; +defm UMIN : I3<"min.u", umin>; + // // Wide multiplication // Index: llvm/trunk/test/CodeGen/NVPTX/combine-min-max.ll =================================================================== --- llvm/trunk/test/CodeGen/NVPTX/combine-min-max.ll +++ llvm/trunk/test/CodeGen/NVPTX/combine-min-max.ll @@ -21,20 +21,140 @@ ret i64 %sel } -; PTX does have e.g. max.s16, but at least as of Kepler (sm_3x) that -; gets compiled to SASS that converts the 16 bit parameters to 32 bit -; before using a 32 bit instruction. That is probably not a win and -; NVCC 7.5 does not emit 16 bit min/max either, presumably for that -; reason. +; ************************************* +; * All variations with i16 + +; *** ab, unsigned, i16 define i16 @ab_ugt_i16(i16 %a, i16 %b) { ; LABEL: @ab_ugt_i16 -; CHECK-NOT: min -; CHECK-NOT: max +; CHECK: max.u16 %cmp = icmp ugt i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b ret i16 %sel } +define i16 @ab_uge_i16(i16 %a, i16 %b) { +; LABEL: @ab_uge_i16 +; CHECK: max.u16 + %cmp = icmp uge i16 %a, %b + %sel = select i1 %cmp, i16 %a, i16 %b + ret i16 %sel +} + +define i16 @ab_ult_i16(i16 %a, i16 %b) { +; LABEL: @ab_ult_i16 +; CHECK: min.u16 + %cmp = icmp ult i16 %a, %b + %sel = select i1 %cmp, i16 %a, i16 %b + ret i16 %sel +} + +define i16 @ab_ule_i16(i16 %a, i16 %b) { +; LABEL: @ab_ule_i16 +; CHECK: min.u16 + %cmp = icmp ule i16 %a, %b + %sel = select i1 %cmp, i16 %a, i16 %b + ret i16 %sel +} + +; *** ab, signed, i16 +define i16 @ab_sgt_i16(i16 %a, i16 %b) { +; LABEL: @ab_ugt_i16 +; CHECK: max.s16 + %cmp = icmp sgt i16 %a, %b + %sel = select i1 %cmp, i16 %a, i16 %b + ret i16 %sel +} + +define i16 @ab_sge_i16(i16 %a, i16 %b) { +; LABEL: @ab_sge_i16 +; CHECK: max.s16 + %cmp = icmp sge i16 %a, %b + %sel = select i1 %cmp, i16 %a, i16 %b + ret i16 %sel +} + +define i16 @ab_slt_i16(i16 %a, i16 %b) { +; LABEL: @ab_slt_i16 +; CHECK: min.s16 + %cmp = icmp slt i16 %a, %b + %sel = select i1 %cmp, i16 %a, i16 %b + ret i16 %sel +} + +define i16 @ab_sle_i16(i16 %a, i16 %b) { +; LABEL: @ab_sle_i16 +; CHECK: min.s16 + %cmp = icmp sle i16 %a, %b + %sel = select i1 %cmp, i16 %a, i16 %b + ret i16 %sel +} + +; *** ba, unsigned, i16 +define i16 @ba_ugt_i16(i16 %a, i16 %b) { +; LABEL: @ba_ugt_i16 +; CHECK: min.u16 + %cmp = icmp ugt i16 %a, %b + %sel = select i1 %cmp, i16 %b, i16 %a + ret i16 %sel +} + +define i16 @ba_uge_i16(i16 %a, i16 %b) { +; LABEL: @ba_uge_i16 +; CHECK: min.u16 + %cmp = icmp uge i16 %a, %b + %sel = select i1 %cmp, i16 %b, i16 %a + ret i16 %sel +} + +define i16 @ba_ult_i16(i16 %a, i16 %b) { +; LABEL: @ba_ult_i16 +; CHECK: max.u16 + %cmp = icmp ult i16 %a, %b + %sel = select i1 %cmp, i16 %b, i16 %a + ret i16 %sel +} + +define i16 @ba_ule_i16(i16 %a, i16 %b) { +; LABEL: @ba_ule_i16 +; CHECK: max.u16 + %cmp = icmp ule i16 %a, %b + %sel = select i1 %cmp, i16 %b, i16 %a + ret i16 %sel +} + +; *** ba, signed, i16 +define i16 @ba_sgt_i16(i16 %a, i16 %b) { +; LBAEL: @ba_ugt_i16 +; CHECK: min.s16 + %cmp = icmp sgt i16 %a, %b + %sel = select i1 %cmp, i16 %b, i16 %a + ret i16 %sel +} + +define i16 @ba_sge_i16(i16 %a, i16 %b) { +; LABEL: @ba_sge_i16 +; CHECK: min.s16 + %cmp = icmp sge i16 %a, %b + %sel = select i1 %cmp, i16 %b, i16 %a + ret i16 %sel +} + +define i16 @ba_slt_i16(i16 %a, i16 %b) { +; LABEL: @ba_slt_i16 +; CHECK: max.s16 + %cmp = icmp slt i16 %a, %b + %sel = select i1 %cmp, i16 %b, i16 %a + ret i16 %sel +} + +define i16 @ba_sle_i16(i16 %a, i16 %b) { +; LABEL: @ba_sle_i16 +; CHECK: max.s16 + %cmp = icmp sle i16 %a, %b + %sel = select i1 %cmp, i16 %b, i16 %a + ret i16 %sel +} ; ************************************* ; * All variations with i32