Index: llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -950,9 +950,7 @@ SmallVector DVal; for (int i=0; i < getVecSize(FInfo); ++i) { - double V = (getArgType(FInfo) == AMDGPULibFunc::F32) - ? (double)CDV->getElementAsFloat(i) - : CDV->getElementAsDouble(i); + double V = CDV->getElementAsAPFloat(i).convertToDouble(); if (V < 0.0) needcopysign = true; V = log2(std::abs(V)); DVal.push_back(V); @@ -986,9 +984,7 @@ } else { if (const ConstantDataVector *CDV = dyn_cast(opr1)) { for (int i=0; i < getVecSize(FInfo); ++i) { - double y = (getArgType(FInfo) == AMDGPULibFunc::F32) - ? (double)CDV->getElementAsFloat(i) - : CDV->getElementAsDouble(i); + double y = CDV->getElementAsAPFloat(i).convertToDouble(); if (y != (double)(int64_t)y) return false; } Index: llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -434,6 +434,39 @@ ret void } +declare half @_Z3powDhDh(half, half) +declare <2 x half> @_Z3powDv2_DhS_(<2 x half>, <2 x half>) + +; GCN-LABEL: define half @test_pow_fast_f16__y_13(half %x) +; GCN-PRELINK: %__fabs = tail call fast half @llvm.fabs.f16(half %x) +; GCN-PRELINK: %__log2 = tail call fast half @_Z4log2Dh(half %__fabs) +; GCN-PRELINK: %__ylogx = fmul fast half %__log2, 0xH4A80 +; GCN-PRELINK: %__exp2 = tail call fast half @_Z4exp2Dh(half %__ylogx) +; GCN-PRELINK: %1 = bitcast half %x to i16 +; GCN-PRELINK: %__pow_sign = and i16 %1, -32768 +; GCN-PRELINK: %2 = bitcast half %__exp2 to i16 +; GCN-PRELINK: %3 = or i16 %__pow_sign, %2 +; GCN-PRELINK: %4 = bitcast i16 %3 to half +define half @test_pow_fast_f16__y_13(half %x) { + %powr = tail call fast half @_Z3powDhDh(half %x, half 13.0) + ret half %powr +} + +; GCN-LABEL: define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) +; GCN-PRELINK: %__fabs = tail call fast <2 x half> @llvm.fabs.v2f16(<2 x half> %x) +; GCN-PRELINK: %__log2 = tail call fast <2 x half> @_Z4log2Dv2_Dh(<2 x half> %__fabs) +; GCN-PRELINK: %__ylogx = fmul fast <2 x half> %__log2, +; GCN-PRELINK: %__exp2 = tail call fast <2 x half> @_Z4exp2Dv2_Dh(<2 x half> %__ylogx) +; GCN-PRELINK: %1 = bitcast <2 x half> %x to <2 x i16> +; GCN-PRELINK: %__pow_sign = and <2 x i16> %1, +; GCN-PRELINK: %2 = bitcast <2 x half> %__exp2 to <2 x i16> +; GCN-PRELINK: %3 = or <2 x i16> %__pow_sign, %2 +; GCN-PRELINK: %4 = bitcast <2 x i16> %3 to <2 x half> +define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) { + %powr = tail call fast <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> ) + ret <2 x half> %powr +} + ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_1 ; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4 ; GCN: store float %tmp, ptr addrspace(1) %a, align 4