Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -100,7 +100,7 @@ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] >; -def int_amdgcn_rsq_clamped : Intrinsic< +def int_amdgcn_rsq_clamp : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_amdgcn_ldexp : Intrinsic< Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -269,7 +269,7 @@ RCP, RSQ, RSQ_LEGACY, - RSQ_CLAMPED, + RSQ_CLAMP, LDEXP, FP_CLASS, DOT4, Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2791,7 +2791,7 @@ NODE_NAME_CASE(RCP) NODE_NAME_CASE(RSQ) NODE_NAME_CASE(RSQ_LEGACY) - NODE_NAME_CASE(RSQ_CLAMPED) + NODE_NAME_CASE(RSQ_CLAMP) NODE_NAME_CASE(LDEXP) NODE_NAME_CASE(FP_CLASS) NODE_NAME_CASE(DOT4) Index: lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -63,7 +63,7 @@ def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>; // out = 1.0 / sqrt(a) result clamped to +/- max_float. -def AMDGPUrsq_clamped : SDNode<"AMDGPUISD::RSQ_CLAMPED", SDTFPUnaryOp>; +def AMDGPUrsq_clamp : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>; def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>; Index: lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.cpp +++ lib/Target/AMDGPU/R600ISelLowering.cpp @@ -802,7 +802,7 @@ // FIXME: Should be renamed to r600 prefix case AMDGPUIntrinsic::AMDGPU_rsq_clamped: - return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1)); + return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1)); case Intrinsic::r600_rsq: case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name Index: lib/Target/AMDGPU/R600Instructions.td =================================================================== --- lib/Target/AMDGPU/R600Instructions.td +++ lib/Target/AMDGPU/R600Instructions.td @@ -1101,7 +1101,7 @@ // Clamped to maximum. class RECIPSQRT_CLAMPED_Common inst> : R600_1OP_Helper < - inst, "RECIPSQRT_CLAMPED", AMDGPUrsq_clamped + inst, "RECIPSQRT_CLAMPED", AMDGPUrsq_clamp > { let Itinerary = TransALU; } Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1351,10 +1351,10 @@ case Intrinsic::amdgcn_rsq: case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); - case Intrinsic::amdgcn_rsq_clamped: + case Intrinsic::amdgcn_rsq_clamp: case AMDGPUIntrinsic::AMDGPU_rsq_clamped: { // Legacy name if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) - return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1)); + return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1)); Type *Type = VT.getTypeForEVT(*DAG.getContext()); APFloat Max = APFloat::getLargest(Type->getFltSemantics()); Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1352,7 +1352,7 @@ defm V_RCP_CLAMP_F32 : VOP1InstSI , "v_rcp_clamp_f32", VOP_F32_F32>; defm V_RCP_LEGACY_F32 : VOP1InstSI , "v_rcp_legacy_f32", VOP_F32_F32>; defm V_RSQ_CLAMP_F32 : VOP1InstSI , "v_rsq_clamp_f32", - VOP_F32_F32, AMDGPUrsq_clamped + VOP_F32_F32, AMDGPUrsq_clamp >; defm V_RSQ_LEGACY_F32 : VOP1InstSI , "v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy @@ -1364,7 +1364,7 @@ defm V_RCP_CLAMP_F64 : VOP1InstSI , "v_rcp_clamp_f64", VOP_F64_F64>; defm V_RSQ_CLAMP_F64 : VOP1InstSI , "v_rsq_clamp_f64", - VOP_F64_F64, AMDGPUrsq_clamped + VOP_F64_F64, AMDGPUrsq_clamp >; } // End SchedRW = [WriteDouble] Index: test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll @@ -0,0 +1,43 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s + +declare float @llvm.amdgcn.rsq.clamp.f32(float) #1 +declare double @llvm.amdgcn.rsq.clamp.f64(double) #1 + +; FUNC-LABEL: {{^}}rsq_clamp_f32: +; SI: v_rsq_clamp_f32_e32 + +; VI: s_load_dword [[SRC:s[0-9]+]] +; VI: v_rsq_f32_e32 [[RSQ:v[0-9]+]], [[SRC]] +; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0x7f7fffff, [[RSQ]] +; TODO: this constant should be folded: +; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0xff7fffff +; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], [[MIN]], [[K]] +; VI: buffer_store_dword [[RESULT]] +define void @rsq_clamp_f32(float addrspace(1)* %out, float %src) #0 { + %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src) + store float %rsq_clamp, float addrspace(1)* %out + ret void +} + + +; FUNC-LABEL: {{^}}rsq_clamp_f64: +; SI: v_rsq_clamp_f64_e32 + +; VI: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[2:3] +; TODO: this constant should be folded: +; VI: s_mov_b32 s[[ALLBITS:[0-9+]]], -1 +; VI: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff +; VI: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]] +; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]] +; VI: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff +; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]] +; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]] +define void @rsq_clamp_f64(double addrspace(1)* %out, double %src) #0 { + %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src) + store double %rsq_clamp, double addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamped.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamped.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s - -declare double @llvm.amdgcn.rsq.clamped.f64(double) nounwind readnone - -; FUNC-LABEL: {{^}}rsq_clamped_f64: -; SI: v_rsq_clamp_f64_e32 - -; VI: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[2:3] -; TODO: this constant should be folded: -; VI: s_mov_b32 s[[ALLBITS:[0-9+]]], -1 -; VI: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff -; VI: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]] -; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]] -; VI: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff -; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]] -; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]] - -define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind { - %rsq_clamped = call double @llvm.amdgcn.rsq.clamped.f64(double %src) nounwind readnone - store double %rsq_clamped, double addrspace(1)* %out, align 8 - ret void -}