Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -53,6 +53,7 @@ SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFLOG(SDValue Op, SelectionDAG &Dag) const; SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -13,6 +13,10 @@ // //===----------------------------------------------------------------------===// +#define AMDGPU_LOG2E_F 1.44269504088896340735992468100189214f +#define AMDGPU_LN2_F 0.693147180559945309417232121458176568f +#define AMDGPU_LN10_F 2.30258509299404568401799145468436421f + #include "AMDGPUISelLowering.h" #include "AMDGPU.h" #include "AMDGPUCallLowering.h" @@ -260,6 +264,14 @@ setOperationAction(ISD::FROUND, MVT::f32, Custom); setOperationAction(ISD::FROUND, MVT::f64, Custom); + setOperationAction(ISD::FLOG, MVT::f32, Custom); + setOperationAction(ISD::FLOG10, MVT::f32, Custom); + + if (Subtarget->has16BitInsts()) { + setOperationAction(ISD::FLOG, MVT::f16, Custom); + setOperationAction(ISD::FLOG10, MVT::f16, Custom); + } + setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom); setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom); @@ -429,6 +441,8 @@ setOperationAction(ISD::FEXP2, VT, Expand); setOperationAction(ISD::FLOG2, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); + setOperationAction(ISD::FLOG, VT, Expand); + setOperationAction(ISD::FLOG10, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); @@ -961,6 +975,9 @@ case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG); case ISD::FROUND: return LowerFROUND(Op, DAG); case ISD::FFLOOR: return LowerFFLOOR(Op, DAG); + case ISD::FLOG: + case ISD::FLOG10: + return LowerFLOG(Op, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG); @@ -1875,6 +1892,28 @@ return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); } +SDValue AMDGPUTargetLowering::LowerFLOG(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + + SDLoc SL(Op); + SDValue Operand = Op.getOperand(0); + + SDValue Log2Operand = DAG.getNode(ISD::FLOG2, SL, VT, Operand); + SDValue Log2Base; + switch (Op.getOpcode()) { + case ISD::FLOG: + Log2Base = DAG.getConstantFP(AMDGPU_LOG2E_F, SL, VT); + break; + case ISD::FLOG10: + Log2Base = DAG.getConstantFP(AMDGPU_LN10_F / AMDGPU_LN2_F, SL, VT); + break; + default: + llvm_unreachable("Wrong log opcode"); + } + + return DAG.getNode(ISD::FDIV, SL, VT, Log2Operand, Log2Base); +} + SDValue AMDGPUTargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Src = Op.getOperand(0); Index: test/CodeGen/AMDGPU/llvm.log.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.log.ll +++ test/CodeGen/AMDGPU/llvm.log.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn | FileCheck %s --check-prefix=SI --check-prefix=FUNC -; RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s --check-prefix=SI --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn | FileCheck %s --check-prefix=GCN --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s --check-prefix=GCN --check-prefix=FUNC ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC @@ -9,14 +9,14 @@ ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} -; SI: v_log_f32 -; SI: v_div_scale_f32 -; SI: v_div_scale_f32 +; GCN: v_log_f32 +; GCN: v_div_scale_f32 +; GCN: v_div_scale_f32 define void @test(float addrspace(1)* %out, float %in) { entry: - %0 = call float @llvm.log.f32(float %in) - store float %0, float addrspace(1)* %out + %res = call float @llvm.log.f32(float %in) + store float %res, float addrspace(1)* %out ret void } @@ -33,17 +33,17 @@ ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} -; SI: v_log_f32 -; SI: v_div_scale_f32 -; SI: v_div_scale_f32 -; SI: v_log_f32 -; SI: v_div_scale_f32 -; SI: v_div_scale_f32 +; GCN: v_log_f32 +; GCN: v_div_scale_f32 +; GCN: v_div_scale_f32 +; GCN: v_log_f32 +; GCN: v_div_scale_f32 +; GCN: v_div_scale_f32 define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) { entry: - %0 = call <2 x float> @llvm.log.v2f32(<2 x float> %in) - store <2 x float> %0, <2 x float> addrspace(1)* %out + %res = call <2 x float> @llvm.log.v2f32(<2 x float> %in) + store <2 x float> %res, <2 x float> addrspace(1)* %out ret void } @@ -70,22 +70,22 @@ ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} -; SI: v_log_f32 -; SI: v_div_scale_f32 -; SI: v_div_scale_f32 -; SI: v_log_f32 -; SI: v_div_scale_f32 -; SI: v_div_scale_f32 -; SI: v_log_f32 -; SI: v_div_scale_f32 -; SI: v_div_scale_f32 -; SI: v_log_f32 -; SI: v_div_scale_f32 -; SI: v_div_scale_f32 +; GCN: v_log_f32 +; GCN: v_div_scale_f32 +; GCN: v_div_scale_f32 +; GCN: v_log_f32 +; GCN: v_div_scale_f32 +; GCN: v_div_scale_f32 +; GCN: v_log_f32 +; GCN: v_div_scale_f32 +; GCN: v_div_scale_f32 +; GCN: v_log_f32 +; GCN: v_div_scale_f32 +; GCN: v_div_scale_f32 define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) { entry: - %0 = call <4 x float> @llvm.log.v4f32(<4 x float> %in) - store <4 x float> %0, <4 x float> addrspace(1)* %out + %res = call <4 x float> @llvm.log.v4f32(<4 x float> %in) + store <4 x float> %res, <4 x float> addrspace(1)* %out ret void } Index: test/CodeGen/AMDGPU/llvm.log10.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.log10.ll +++ test/CodeGen/AMDGPU/llvm.log10.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn | FileCheck %s --check-prefix=SI --check-prefix=FUNC -; RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s --check-prefix=SI --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn | FileCheck %s --check-prefix=GCN --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s --check-prefix=GCN --check-prefix=FUNC ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC @@ -9,14 +9,14 @@ ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} -; SI: v_log_f32 -; SI: v_div_scale_f32 -; SI: v_div_scale_f32 +; GCN: v_log_f32 +; GCN: v_div_scale_f32 +; GCN: v_div_scale_f32 define void @test(float addrspace(1)* %out, float %in) { entry: - %0 = call float @llvm.log10.f32(float %in) - store float %0, float addrspace(1)* %out + %res = call float @llvm.log10.f32(float %in) + store float %res, float addrspace(1)* %out ret void } @@ -33,17 +33,17 @@ ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} -; SI: v_log_f32 -; SI: v_div_scale_f32 -; SI: v_div_scale_f32 -; SI: v_log_f32 -; SI: v_div_scale_f32 -; SI: v_div_scale_f32 +; GCN: v_log_f32 +; GCN: v_div_scale_f32 +; GCN: v_div_scale_f32 +; GCN: v_log_f32 +; GCN: v_div_scale_f32 +; GCN: v_div_scale_f32 define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) { entry: - %0 = call <2 x float> @llvm.log10.v2f32(<2 x float> %in) - store <2 x float> %0, <2 x float> addrspace(1)* %out + %res = call <2 x float> @llvm.log10.v2f32(<2 x float> %in) + store <2 x float> %res, <2 x float> addrspace(1)* %out ret void } @@ -70,22 +70,22 @@ ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} ; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} -; SI: v_log_f32 -; SI: v_div_scale_f32 -; SI: v_div_scale_f32 -; SI: v_log_f32 -; SI: v_div_scale_f32 -; SI: v_div_scale_f32 -; SI: v_log_f32 -; SI: v_div_scale_f32 -; SI: v_div_scale_f32 -; SI: v_log_f32 -; SI: v_div_scale_f32 -; SI: v_div_scale_f32 +; GCN: v_log_f32 +; GCN: v_div_scale_f32 +; GCN: v_div_scale_f32 +; GCN: v_log_f32 +; GCN: v_div_scale_f32 +; GCN: v_div_scale_f32 +; GCN: v_log_f32 +; GCN: v_div_scale_f32 +; GCN: v_div_scale_f32 +; GCN: v_log_f32 +; GCN: v_div_scale_f32 +; GCN: v_div_scale_f32 define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) { entry: - %0 = call <4 x float> @llvm.log10.v4f32(<4 x float> %in) - store <4 x float> %0, <4 x float> addrspace(1)* %out + %res = call <4 x float> @llvm.log10.v4f32(<4 x float> %in) + store <4 x float> %res, <4 x float> addrspace(1)* %out ret void }