Index: lib/Target/R600/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -111,6 +111,9 @@ bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const; + bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, + SDValue &Omod) const; + SDNode *SelectADD_SUB_I64(SDNode *N); SDNode *SelectDIV_SCALE(SDNode *N); @@ -1122,6 +1125,15 @@ return SelectVOP3Mods(In, Src, SrcMods); } +bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, + SDValue &SrcMods, + SDValue &Omod) const { + // FIXME: Handle Omod + Omod = CurDAG->getTargetConstant(0, MVT::i32); + + return SelectVOP3Mods(In, Src, SrcMods); +} + void AMDGPUDAGToDAGISel::PostprocessISelDAG() { const AMDGPUTargetLowering& Lowering = *static_cast(getTargetLowering()); Index: lib/Target/R600/SIISelLowering.cpp =================================================================== --- lib/Target/R600/SIISelLowering.cpp +++ lib/Target/R600/SIISelLowering.cpp @@ -676,20 +676,6 @@ MI->eraseFromParent(); break; } - case AMDGPU::FCLAMP_SI: { - const SIInstrInfo *TII = static_cast( - getTargetMachine().getSubtargetImpl()->getInstrInfo()); - DebugLoc DL = MI->getDebugLoc(); - unsigned DestReg = MI->getOperand(0).getReg(); - BuildMI(*BB, I, DL, TII->get(AMDGPU::V_ADD_F32_e64), DestReg) - .addImm(0) // SRC0 modifiers - .addOperand(MI->getOperand(1)) - .addImm(0) // SRC1 modifiers - .addImm(0) // SRC1 - .addImm(1) // CLAMP - .addImm(0); // OMOD - MI->eraseFromParent(); - } } return BB; } Index: lib/Target/R600/SIInstrInfo.td =================================================================== --- lib/Target/R600/SIInstrInfo.td +++ lib/Target/R600/SIInstrInfo.td @@ -245,6 +245,7 @@ def MUBUFOffsetAtomic : ComplexPattern; def VOP3Mods0 : ComplexPattern; +def VOP3Mods0Clamp : ComplexPattern; def VOP3Mods : ComplexPattern; //===----------------------------------------------------------------------===// Index: lib/Target/R600/SIInstructions.td =================================================================== --- lib/Target/R600/SIInstructions.td +++ lib/Target/R600/SIInstructions.td @@ -2415,8 +2415,8 @@ } def : Pat < - (AMDGPUclamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)), - (FCLAMP_SI f32:$src) + (AMDGPUclamp (VOP3Mods0Clamp f32:$src0, i32:$src0_modifiers, i32:$omod), (f32 FP_ZERO), (f32 FP_ONE)), + (V_ADD_F32_e64 $src0_modifiers, $src0, 0, 0, 1, $omod) >; /********** ================================ **********/ Index: test/CodeGen/R600/llvm.AMDGPU.clamp.ll =================================================================== --- test/CodeGen/R600/llvm.AMDGPU.clamp.ll +++ test/CodeGen/R600/llvm.AMDGPU.clamp.ll @@ -1,6 +1,7 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +declare float @llvm.fabs.f32(float) nounwind readnone declare float @llvm.AMDGPU.clamp.f32(float, float, float) nounwind readnone declare float @llvm.AMDIL.clamp.f32(float, float, float) nounwind readnone @@ -17,6 +18,43 @@ ret void } +; FUNC-LABEL: {{^}}clamp_fabs_0_1_f32: +; SI: S_LOAD_DWORD [[ARG:s[0-9]+]], +; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], |[[ARG]]|, 0 clamp{{$}} +; SI: BUFFER_STORE_DWORD [[RESULT]] +; SI: S_ENDPGM +define void @clamp_fabs_0_1_f32(float addrspace(1)* %out, float %src) nounwind { + %src.fabs = call float @llvm.fabs.f32(float %src) nounwind readnone + %clamp = call float @llvm.AMDGPU.clamp.f32(float %src.fabs, float 0.0, float 1.0) nounwind readnone + store float %clamp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}clamp_fneg_0_1_f32: +; SI: S_LOAD_DWORD [[ARG:s[0-9]+]], +; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], -[[ARG]], 0 clamp{{$}} +; SI: BUFFER_STORE_DWORD [[RESULT]] +; SI: S_ENDPGM +define void @clamp_fneg_0_1_f32(float addrspace(1)* %out, float %src) nounwind { + %src.fneg = fsub float -0.0, %src + %clamp = call float @llvm.AMDGPU.clamp.f32(float %src.fneg, float 0.0, float 1.0) nounwind readnone + store float %clamp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}clamp_fneg_fabs_0_1_f32: +; SI: S_LOAD_DWORD [[ARG:s[0-9]+]], +; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], -|[[ARG]]|, 0 clamp{{$}} +; SI: BUFFER_STORE_DWORD [[RESULT]] +; SI: S_ENDPGM +define void @clamp_fneg_fabs_0_1_f32(float addrspace(1)* %out, float %src) nounwind { + %src.fabs = call float @llvm.fabs.f32(float %src) nounwind readnone + %src.fneg.fabs = fsub float -0.0, %src.fabs + %clamp = call float @llvm.AMDGPU.clamp.f32(float %src.fneg.fabs, float 0.0, float 1.0) nounwind readnone + store float %clamp, float addrspace(1)* %out, align 4 + ret void +} + ; FUNC-LABEL: {{^}}clamp_0_1_amdil_legacy_f32: ; SI: S_LOAD_DWORD [[ARG:s[0-9]+]], ; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}