Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -78,7 +78,6 @@ bool shouldCombineMemoryType(EVT VT) const; SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; - SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2898,28 +2898,6 @@ SN->getBasePtr(), SN->getMemOperand()); } -SDValue AMDGPUTargetLowering::performClampCombine(SDNode *N, - DAGCombinerInfo &DCI) const { - ConstantFPSDNode *CSrc = dyn_cast(N->getOperand(0)); - if (!CSrc) - return SDValue(); - - const APFloat &F = CSrc->getValueAPF(); - APFloat Zero = APFloat::getZero(F.getSemantics()); - APFloat::cmpResult Cmp0 = F.compare(Zero); - if (Cmp0 == APFloat::cmpLessThan || - (Cmp0 == APFloat::cmpUnordered && Subtarget->enableDX10Clamp())) { - return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0)); - } - - APFloat One(F.getSemantics(), "1.0"); - APFloat::cmpResult Cmp1 = F.compare(One); - if (Cmp1 == APFloat::cmpGreaterThan) - return DCI.DAG.getConstantFP(One, SDLoc(N), N->getValueType(0)); - - return SDValue(CSrc, 0); -} - // FIXME: This should go in generic DAG combiner with an isTruncateFree check, // but isTruncateFree is inaccurate for i16 now because of SALU vs. VALU // issues. @@ -3960,8 +3938,6 @@ return performLoadCombine(N, DCI); case ISD::STORE: return performStoreCombine(N, DCI); - case AMDGPUISD::CLAMP: - return performClampCombine(N, DCI); case AMDGPUISD::RCP: { if (const auto *CFP = dyn_cast(N->getOperand(0))) { // XXX - Should this flush denormals? Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -168,8 +168,6 @@ [SDNPCommutative, SDNPAssociative] >; -def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>; - // out = min(a, b) a and b are floats, where a nan comparison fails. def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp, [] Index: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -287,13 +287,6 @@ return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); } break; - case AMDGPU::CLAMP_R600: { - MachineInstr *NewMI = TII->buildDefaultInstruction( - *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(), - MI.getOperand(1).getReg()); - TII->addFlag(*NewMI, 0, MO_FLAG_CLAMP); - break; - } case AMDGPU::FABS_R600: { MachineInstr *NewMI = TII->buildDefaultInstruction( @@ -2180,20 +2173,6 @@ if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG)) return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); } - } else if (Opcode == AMDGPU::CLAMP_R600) { - SDValue Src = Node->getOperand(0); - if (!Src.isMachineOpcode() || - !TII->hasInstrModifiers(Src.getMachineOpcode())) - return Node; - int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(), - AMDGPU::OpName::clamp); - if (ClampIdx < 0) - return Node; - SDLoc DL(Node); - std::vector Ops(Src->op_begin(), Src->op_end()); - Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32); - return DAG.getMachineNode(Src.getMachineOpcode(), DL, - Node->getVTList(), Ops); } else { if (!TII->hasInstrModifiers(Opcode)) return Node; Index: llvm/trunk/lib/Target/AMDGPU/R600Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/R600Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/R600Instructions.td @@ -663,13 +663,6 @@ let usesCustomInserter = 1 in { -class CLAMP : AMDGPUShaderInst < - (outs rc:$dst), - (ins rc:$src0), - "CLAMP $dst, $src0", - [(set f32:$dst, (AMDGPUclamp f32:$src0))] ->; - class FABS : AMDGPUShaderInst < (outs rc:$dst), (ins rc:$src0), @@ -1194,7 +1187,6 @@ let Itinerary = TransALU; } -def CLAMP_R600 : CLAMP ; def FABS_R600 : FABS; def FNEG_R600 : FNEG; Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h @@ -131,6 +131,7 @@ SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const; bool isLegalFlatAddressingMode(const AddrMode &AM) const; bool isLegalGlobalAddressingMode(const AddrMode &AM) const; Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7087,6 +7087,29 @@ return SDValue(); } +SDValue SITargetLowering::performClampCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + ConstantFPSDNode *CSrc = dyn_cast(N->getOperand(0)); + if (!CSrc) + return SDValue(); + + const APFloat &F = CSrc->getValueAPF(); + APFloat Zero = APFloat::getZero(F.getSemantics()); + APFloat::cmpResult Cmp0 = F.compare(Zero); + if (Cmp0 == APFloat::cmpLessThan || + (Cmp0 == APFloat::cmpUnordered && Subtarget->enableDX10Clamp())) { + return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0)); + } + + APFloat One(F.getSemantics(), "1.0"); + APFloat::cmpResult Cmp1 = F.compare(One); + if (Cmp1 == APFloat::cmpGreaterThan) + return DCI.DAG.getConstantFP(One, SDLoc(N), N->getValueType(0)); + + return SDValue(CSrc, 0); +} + + SDValue SITargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { @@ -7179,6 +7202,8 @@ return performFMed3Combine(N, DCI); case AMDGPUISD::CVT_PKRTZ_F16_F32: return performCvtPkRTZCombine(N, DCI); + case AMDGPUISD::CLAMP: + return performClampCombine(N, DCI); case ISD::SCALAR_TO_VECTOR: { SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -33,6 +33,8 @@ // SI DAG Nodes //===----------------------------------------------------------------------===// +def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>; + def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT", SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i32>]>, [SDNPMayLoad, SDNPMemOperand]