Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -134,6 +134,7 @@ SDNode *SelectADD_SUB_I64(SDNode *N); SDNode *SelectDIV_SCALE(SDNode *N); + SDNode *SelectCLAMP(SDNode *N); SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, uint32_t Offset, uint32_t Width); @@ -590,6 +591,14 @@ case AMDGPUISD::DIV_SCALE: { return SelectDIV_SCALE(N); } + + case AMDGPUISD::CLAMP: { + SDNode *Clamp = SelectCLAMP(N); + if (Clamp) + return Clamp; + break; + } + case ISD::CopyToReg: { const SITargetLowering& Lowering = *static_cast(getTargetLowering()); @@ -866,6 +875,43 @@ return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); } +SDNode *AMDGPUDAGToDAGISel::SelectCLAMP(SDNode *N) { + + SDValue N0 = N->getOperand(0); + + // If the input is FABS or FNEG then we want to fallback to tablegen + // to make sure the clamp and the FABS or FNEG modifiers are applied + // to the same instructions. + if (!N0->hasOneUse() || N0->getOpcode() == ISD::FABS || + N0->getOpcode() == ISD::FNEG) + return nullptr; + + SDNode *SelectedN0 = SelectCode(N0.getNode()); + if (!SelectedN0) + return nullptr; + + int ClampIdx = AMDGPU::getNamedOperandIdx(SelectedN0->getMachineOpcode(), + AMDGPU::OpName::clamp); + if (ClampIdx == -1) + return nullptr; + + // SDNodes don't have a dst operand, so we need to subtract one from the + // operand index. + ClampIdx--; + + SmallVector Ops; + for (int i = 0, e = SelectedN0->getNumOperands(); i != e; ++i) { + if (i == ClampIdx) { + Ops.push_back(CurDAG->getTargetConstant(1, SDLoc(N), MVT::i32)); + continue; + } + Ops.push_back(SelectedN0->getOperand(i)); + } + CurDAG->UpdateNodeOperands(SelectedN0, Ops); + + return SelectedN0; +} + bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, unsigned OffsetBits) const { if ((OffsetBits == 16 && !isUInt<16>(Offset)) || Index: test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll +++ test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll @@ -65,6 +65,26 @@ store float %clamp, float addrspace(1)* %out, align 4 ret void } + +; FUNC-LABEL: {{^}}clamp_fold: +; SI: v_mul_f32_e64 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}} clamp +define void @clamp_fold(float addrspace(1)* %out, float %src) { + %tmp0 = fmul float %src, 10.0 + %clamp = call float @llvm.AMDGPU.clamp.f32(float %tmp0, float 0.0, float 1.0) readnone + store float %clamp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}clamp_no_fold: +; SI: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+$}} +define void @clamp_no_fold(float addrspace(1)* %out, float %src) { + %tmp0 = fmul float %src, 10.0 + %clamp = call float @llvm.AMDGPU.clamp.f32(float %tmp0, float 0.0, float 1.0) readnone + %tmp1 = fmul float %tmp0, %clamp + store float %tmp1, float addrspace(1)* %out, align 4 + ret void +} + ; FUNC-LABEL: {{^}}clamp_non_standard: ; SI-DAG: v_max_f32 ; SI-DAG: v_min_f32