Index: clang/include/clang/Basic/BuiltinsAMDGPU.def =================================================================== --- clang/include/clang/Basic/BuiltinsAMDGPU.def +++ clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -100,6 +100,7 @@ BUILTIN(__builtin_amdgcn_rsq_clampf, "ff", "nc") BUILTIN(__builtin_amdgcn_sinf, "ff", "nc") BUILTIN(__builtin_amdgcn_cosf, "ff", "nc") +BUILTIN(__builtin_amdgcn_logf, "ff", "nc") BUILTIN(__builtin_amdgcn_log_clampf, "ff", "nc") BUILTIN(__builtin_amdgcn_ldexp, "ddi", "nc") BUILTIN(__builtin_amdgcn_ldexpf, "ffi", "nc") Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -17171,6 +17171,8 @@ return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); case AMDGPU::BI__builtin_amdgcn_dispatch_ptr: return EmitAMDGPUDispatchPtr(*this, E); + case AMDGPU::BI__builtin_amdgcn_logf: + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log); case AMDGPU::BI__builtin_amdgcn_log_clampf: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); case AMDGPU::BI__builtin_amdgcn_ldexp: Index: clang/test/CodeGenOpenCL/builtins-amdgcn.cl =================================================================== --- clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -172,6 +172,13 @@ *out = __builtin_amdgcn_cosf(a); } +// CHECK-LABEL: @test_log_f32 +// CHECK: call float @llvm.amdgcn.log.f32 +void test_log_f32(global float* out, float a) +{ + *out = __builtin_amdgcn_logf(a); +} + // CHECK-LABEL: @test_log_clamp_f32 // CHECK: call float @llvm.amdgcn.log.clamp.f32 void test_log_clamp_f32(global float* out, float a) Index: llvm/docs/ReleaseNotes.rst =================================================================== --- llvm/docs/ReleaseNotes.rst +++ llvm/docs/ReleaseNotes.rst @@ -133,6 +133,9 @@ improves the interaction between AMDGPU buffer operations and the LLVM memory model, and so the non `.ptr` intrinsics are deprecated. +* Added llvm.amdgcn.log.f32 intrinsic. This provides direct access to + v_log_f32. + Changes to the ARM Backend -------------------------- Index: llvm/include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -300,6 +300,14 @@ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; +// v_log_{f16|f32}, performs log2. f32 version does not handle +// denormals. There is no reason to use this for f16 as it does +// support denormals, and the generic log intrinsic should be +// preferred. +def int_amdgcn_log : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] +>; + def int_amdgcn_log_clamp : DefaultAttrsIntrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -444,6 +444,10 @@ RSQ, RCP_LEGACY, RCP_IFLAG, + + // log2, no denormal handling for f32. + LOG, + FMUL_LEGACY, RSQ_CLAMP, FP_CLASS, Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4687,6 +4687,7 @@ NODE_NAME_CASE(RSQ) NODE_NAME_CASE(RCP_LEGACY) NODE_NAME_CASE(RCP_IFLAG) + NODE_NAME_CASE(LOG) NODE_NAME_CASE(FMUL_LEGACY) NODE_NAME_CASE(RSQ_CLAMP) NODE_NAME_CASE(FP_CLASS) Index: llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -115,6 +115,9 @@ // out = 1.0 / a def AMDGPUrcp_impl : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>; +// v_log_f32, which is log2 +def AMDGPUlog_impl : SDNode<"AMDGPUISD::LOG", SDTFPUnaryOp>; + // out = 1.0 / sqrt(a) def AMDGPUrsq_impl : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>; @@ -385,6 +388,12 @@ (AMDGPUcos_impl node:$src)]>; def AMDGPUfract : PatFrags<(ops node:$src), [(int_amdgcn_fract node:$src), (AMDGPUfract_impl node:$src)]>; +def AMDGPUlog : PatFrags<(ops node:$src), [(int_amdgcn_log node:$src), + (AMDGPUlog_impl node:$src), + (flog2 node:$src)]>; +def AMDGPUlogf16 : PatFrags<(ops node:$src), [(int_amdgcn_log node:$src), + (flog2 node:$src)]>; + def AMDGPUfp_class : PatFrags<(ops node:$src0, node:$src1), [(int_amdgcn_class node:$src0, node:$src1), Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4207,6 +4207,7 @@ case Intrinsic::amdgcn_sin: case Intrinsic::amdgcn_cos: case Intrinsic::amdgcn_log_clamp: + case Intrinsic::amdgcn_log: case Intrinsic::amdgcn_rcp: case Intrinsic::amdgcn_rcp_legacy: case Intrinsic::amdgcn_sqrt: Index: llvm/lib/Target/AMDGPU/R600Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/R600Instructions.td +++ llvm/lib/Target/AMDGPU/R600Instructions.td @@ -1124,7 +1124,7 @@ >; class LOG_IEEE_Common inst> : R600_1OP_Helper < - inst, "LOG_IEEE", flog2 + inst, "LOG_IEEE", AMDGPUlog > { let Itinerary = TransALU; } Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -10555,6 +10555,7 @@ case Intrinsic::amdgcn_rcp_legacy: case Intrinsic::amdgcn_rsq_legacy: case Intrinsic::amdgcn_trig_preop: + case Intrinsic::amdgcn_log: return true; default: break; Index: llvm/lib/Target/AMDGPU/VOP1Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -322,7 +322,7 @@ let TRANS = 1, SchedRW = [WriteTrans32] in { defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>; -defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>; +defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, AMDGPUlog>; defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>; defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; @@ -487,7 +487,7 @@ defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; defm V_RSQ_F16 : VOP1Inst_t16 <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; -defm V_LOG_F16 : VOP1Inst_t16 <"v_log_f16", VOP_F16_F16, flog2>; +defm V_LOG_F16 : VOP1Inst_t16 <"v_log_f16", VOP_F16_F16, AMDGPUlogf16>; defm V_EXP_F16 : VOP1Inst_t16 <"v_exp_f16", VOP_F16_F16, fexp2>; defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; Index: llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll +++ llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll @@ -872,6 +872,16 @@ ret float %canonicalized } +; GCN-LABEL: {{^}}v_test_canonicalize_amdgcn_log: +; GCN: s_waitcnt +; GCN-NEXT: v_log_f32 +; GCN-NEXT: s_setpc_b64 +define float @v_test_canonicalize_amdgcn_log(float %a) { + %log = call float @llvm.amdgcn.log.f32(float %a) + %canonicalized = call float @llvm.canonicalize.f32(float %log) + ret float %canonicalized +} + ; Avoid failing the test on FreeBSD11.0 which will match the GCN-NOT: 1.0 ; in the .amd_amdgpu_isa "amdgcn-unknown-freebsd11.0--gfx802" directive ; GCN: .amd_amdgpu_isa @@ -900,6 +910,7 @@ declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #0 declare float @llvm.amdgcn.cubeid(float, float, float) #0 declare float @llvm.amdgcn.frexp.mant.f32(float) #0 +declare float @llvm.amdgcn.log.f32(float) #0 attributes #0 = { nounwind readnone } attributes #1 = { "no-nans-fp-math"="true" } Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GISEL %s + +define float @v_log_f32(float %src) { +; GCN-LABEL: v_log_f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_log_f32_e32 v0, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] + %log = call float @llvm.amdgcn.log.f32(float %src) + ret float %log +} + +define float @v_fabs_log_f32(float %src) { +; GCN-LABEL: v_fabs_log_f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_log_f32_e64 v0, |v0| +; GCN-NEXT: s_setpc_b64 s[30:31] + %fabs.src = call float @llvm.fabs.f32(float %src) + %log = call float @llvm.amdgcn.log.f32(float %fabs.src) + ret float %log +} + +define float @v_fneg_fabs_log_f32(float %src) { +; GCN-LABEL: v_fneg_fabs_log_f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_log_f32_e64 v0, -|v0| +; GCN-NEXT: s_setpc_b64 s[30:31] + %fabs.src = call float @llvm.fabs.f32(float %src) + %neg.fabs.src = fneg float %fabs.src + %log = call float @llvm.amdgcn.log.f32(float %neg.fabs.src) + ret float %log +} + +define half @v_log_f16(half %src) { +; GCN-LABEL: v_log_f16: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_log_f16_e32 v0, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] + %log = call half @llvm.amdgcn.log.f16(half %src) + ret half %log +} + +define half @v_fabs_log_f16(half %src) { +; GCN-LABEL: v_fabs_log_f16: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_log_f16_e64 v0, |v0| +; GCN-NEXT: s_setpc_b64 s[30:31] + %fabs.src = call half @llvm.fabs.f16(half %src) + %log = call half @llvm.amdgcn.log.f16(half %fabs.src) + ret half %log +} + +define half @v_fneg_fabs_log_f16(half %src) { +; GCN-LABEL: v_fneg_fabs_log_f16: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_log_f16_e64 v0, -|v0| +; GCN-NEXT: s_setpc_b64 s[30:31] + %fabs.src = call half @llvm.fabs.f16(half %src) + %neg.fabs.src = fneg half %fabs.src + %log = call half @llvm.amdgcn.log.f16(half %neg.fabs.src) + ret half %log +} + +declare half @llvm.amdgcn.log.f16(half) #0 +declare float @llvm.amdgcn.log.f32(float) #0 +declare float @llvm.fabs.f32(float) #0 +declare half @llvm.fabs.f16(half) #0 + +attributes #0 = { nounwind readnone speculatable willreturn } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GISEL: {{.*}} +; SDAG: {{.*}}