Index: libclc/clspv/lib/SOURCES =================================================================== --- libclc/clspv/lib/SOURCES +++ libclc/clspv/lib/SOURCES @@ -1,48 +1,105 @@ +math/fp16/asinh.cl +math/fp16/atanh.cl +math/fp16/cbrt.cl +math/fp16/cos.cl +math/fp16/cosh.cl +math/fp16/cospi.cl +math/fp16/exp.cl +math/fp16/exp10.cl +math/fp16/expm1.cl +math/fp16/hypot.cl +math/fp16/ldexp.cl +math/fp16/log.cl +math/fp16/log10.cl +math/fp16/pow.cl +math/fp16/pown.cl +math/fp16/powr.cl +math/fp16/rootn.cl +math/fp16/signbit.cl +math/fp16/sin.cl +math/fp16/sinh.cl +math/fp16/sinpi.cl +math/fp16/tan.cl +math/fp16/tanpi.cl +math/fp16/trig.cl +math/fp16/upcast.cl +math/nextafter.cl subnormal_config.cl +../../generic/lib/clcmacro.h ../../generic/lib/geometric/distance.cl ../../generic/lib/geometric/length.cl -math/fma.cl -math/nextafter.cl +../../generic/lib/math/acos.cl ../../generic/lib/math/acosh.cl +../../generic/lib/math/acospi.cl +../../generic/lib/math/asin.cl ../../generic/lib/math/asinh.cl +../../generic/lib/math/asinpi.cl ../../generic/lib/math/atan.cl ../../generic/lib/math/atan2.cl ../../generic/lib/math/atan2pi.cl ../../generic/lib/math/atanh.cl ../../generic/lib/math/atanpi.cl ../../generic/lib/math/cbrt.cl +../../generic/lib/math/clc_exp10.cl ../../generic/lib/math/clc_fmod.cl ../../generic/lib/math/clc_hypot.cl ../../generic/lib/math/clc_ldexp.cl ../../generic/lib/math/clc_nextafter.cl +../../generic/lib/math/clc_pow.cl +../../generic/lib/math/clc_pown.cl +../../generic/lib/math/clc_powr.cl ../../generic/lib/math/clc_remainder.cl ../../generic/lib/math/clc_remquo.cl ../../generic/lib/math/clc_rootn.cl -../../generic/lib/math/clc_sqrt.cl ../../generic/lib/math/clc_tan.cl +../../generic/lib/math/clc_tanpi.cl +../../generic/lib/math/cos.cl +../../generic/lib/math/cosh.cl +../../generic/lib/math/cospi.cl ../../generic/lib/math/erf.cl ../../generic/lib/math/erfc.cl +../../generic/lib/math/exp.cl +../../generic/lib/math/exp10.cl +../../generic/lib/math/exp2.cl +../../generic/lib/math/exp_helper.cl +../../generic/lib/math/expm1.cl +../../generic/lib/math/fdim.cl ../../generic/lib/math/fmod.cl ../../generic/lib/math/fract.cl ../../generic/lib/math/frexp.cl +../../generic/lib/math/half_cos.cl ../../generic/lib/math/half_divide.cl +../../generic/lib/math/half_powr.cl ../../generic/lib/math/half_recip.cl -../../generic/lib/math/half_sqrt.cl +../../generic/lib/math/half_sin.cl +../../generic/lib/math/half_tan.cl ../../generic/lib/math/hypot.cl ../../generic/lib/math/ilogb.cl ../../generic/lib/math/ldexp.cl ../../generic/lib/math/lgamma.cl ../../generic/lib/math/lgamma_r.cl +../../generic/lib/math/log.cl +../../generic/lib/math/log10.cl +../../generic/lib/math/log1p.cl +../../generic/lib/math/log2.cl ../../generic/lib/math/logb.cl ../../generic/lib/math/maxmag.cl ../../generic/lib/math/minmag.cl ../../generic/lib/math/modf.cl ../../generic/lib/math/nan.cl +../../generic/lib/math/pow.cl +../../generic/lib/math/pown.cl +../../generic/lib/math/powr.cl ../../generic/lib/math/remainder.cl ../../generic/lib/math/remquo.cl ../../generic/lib/math/rootn.cl -../../generic/lib/math/rsqrt.cl -../../generic/lib/math/sqrt.cl +../../generic/lib/math/sin.cl +../../generic/lib/math/sincos.cl +../../generic/lib/math/sincos_helpers.cl +../../generic/lib/math/sinh.cl +../../generic/lib/math/sinpi.cl ../../generic/lib/math/tables.cl +../../generic/lib/math/tan.cl ../../generic/lib/math/tanh.cl +../../generic/lib/math/tanpi.cl ../../generic/lib/math/tgamma.cl Index: libclc/clspv/lib/math/fp16/asinh.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/asinh.cl @@ -0,0 +1,62 @@ +/* + * University of Illinois/NCSA + * Open Source License + * + * Copyright (c) 2014-2016, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * with the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the names of the LLVM Team, University of Illinois at + * Urbana-Champaign, nor the names of its contributors may be used to + * endorse or promote products derived from this Software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH + * THE SOFTWARE. + */ + +#include "fp16.h" +#include "trig.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEF _CLC_OVERLOAD half asinh(half hx) { + float x = (float)fabs(hx); + float t = x + sqrt(mad(x, x, 1.0f)); + half ret = copysign((half)(log2(t) * 0x1.62e430p-1f), hx); + + ret = isinf(hx) || isnan(hx) ? hx : ret; + + return ret; +} + +_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, asinh, half) + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/atanh.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/atanh.cl @@ -0,0 +1,64 @@ +/* + * University of Illinois/NCSA + * Open Source License + * + * Copyright (c) 2014-2016, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * with the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the names of the LLVM Team, University of Illinois at + * Urbana-Champaign, nor the names of its contributors may be used to + * endorse or promote products derived from this Software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH + * THE SOFTWARE. + */ + +#include "fp16.h" +#include "trig.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEF _CLC_OVERLOAD half atanh(half hx) { + float x = (float)fabs(hx); + float t = (1.0f + x) * (1.f / (1.0f - x)); + half ret = (half)(log2(t) * 0x1.62e430p-2f); + ret = x < 0x1.0p-7f ? x : ret; + + ret = x == 1.0f ? as_half((short)PINFBITPATT_DP16) : ret; + ret = (x > 1.0f) | isnan(x) ? as_half((short)QNANBITPATT_DP16) : ret; + + return copysign(ret, hx); +} + +_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, atanh, half) + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/cbrt.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/cbrt.cl @@ -0,0 +1,23 @@ +#include "fp16.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEF _CLC_OVERLOAD half cbrt(half x) { + float float_result = cbrt((float)x); + half ret = (half)float_result; + ret = isinf(float_result) ? float_result >= 0.f + ? as_half((short)PINFBITPATT_DP16) + : as_half((short)NINFBITPATT_DP16) + : ret; + ret = isnan(float_result) ? as_half((short)QNANBITPATT_DP16) : ret; + ret = !isinf(ret) && fabs(float_result) > HALF_MAX + ? float_result >= 0.f ? as_half((ushort)PINFBITPATT_DP16) + : as_half((ushort)NINFBITPATT_DP16) + : ret; + return ret; +} + +_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, cbrt, half) + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/cos.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/cos.cl @@ -0,0 +1,66 @@ +/* + * University of Illinois/NCSA + * Open Source License + * + * Copyright (c) 2014-2016, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * with the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the names of the LLVM Team, University of Illinois at + * Urbana-Champaign, nor the names of its contributors may be used to + * endorse or promote products derived from this Software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH + * THE SOFTWARE. + */ + +#include "fp16.h" +#include "trig.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEF _CLC_OVERLOAD half cos(half x) { + half ax = fabs(x); + struct redret r = trigred(ax); + struct scret sc = sincosred(r.hi); + sc.s = -sc.s; + + short c = as_short(((r.i & 1) == 0 ? sc.c : sc.s)); + c ^= r.i > 1 ? (short)0x8000 : (short)0; + + c = isfinite(ax) ? c : (short)QNANBITPATT_DP16; + + return as_half(c); +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, cos, half); + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/cosh.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/cosh.cl @@ -0,0 +1,62 @@ +/* + * University of Illinois/NCSA + * Open Source License + * + * Copyright (c) 2014-2016, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * with the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the names of the LLVM Team, University of Illinois at + * Urbana-Champaign, nor the names of its contributors may be used to + * endorse or promote products derived from this Software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH + * THE SOFTWARE. + */ + +#include "fp16.h" +#include "trig.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +half _CLC_DEF _CLC_OVERLOAD cosh(half hx) { + float x = (float)hx * 0x1.715476p+0f; + float x1 = (0.5f * (exp2(x) + exp2(-x))); + half result = (half)x1; + result = x1 >= HALF_MAX ? as_half((short)PINFBITPATT_DP16) : result; + + result = isnan(hx) ? as_half((short)QNANBITPATT_DP16) : result; + return result; +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, cosh, half); + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/cospi.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/cospi.cl @@ -0,0 +1,64 @@ +/* + * University of Illinois/NCSA + * Open Source License + * + * Copyright (c) 2014-2016, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * with the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the names of the LLVM Team, University of Illinois at + * Urbana-Champaign, nor the names of its contributors may be used to + * endorse or promote products derived from this Software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH + * THE SOFTWARE. + */ + +#include "fp16.h" +#include "trig.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEF _CLC_OVERLOAD half cospi(half x) { + half ax = fabs(x); + struct redret r = trigpired(ax); + struct scret sc = sincospired(r.hi); + sc.s = -sc.s; + + short c = as_short((r.i & (short)1) == (short)0 ? sc.c : sc.s); + c ^= r.i > (short)1 ? (short)0x8000 : (short)0; + c = isfinite(ax) ? c : (short)QNANBITPATT_DP16; + return as_half(c); +} + +_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, cospi, half) + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/exp.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/exp.cl @@ -0,0 +1,61 @@ +/* + * University of Illinois/NCSA + * Open Source License + * + * Copyright (c) 2014-2016, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * with the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the names of the LLVM Team, University of Illinois at + * Urbana-Champaign, nor the names of its contributors may be used to + * endorse or promote products derived from this Software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH + * THE SOFTWARE. + */ + +#include "fp16.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +half _CLC_DEF _CLC_OVERLOAD exp(half x) { + float result_f = (half)exp2((float)x * 0x1.715476p+0f); + half result = (half)result_f; + result = fabs(result_f) >= HALF_MAX + ? copysign(as_half((short)PINFBITPATT_DP16), result) + : result; + result = isnan(x) ? as_half((short)QNANBITPATT_DP16) : result; + return result; +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, exp, half); + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/exp10.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/exp10.cl @@ -0,0 +1,61 @@ +/* + * University of Illinois/NCSA + * Open Source License + * + * Copyright (c) 2014-2016, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * with the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the names of the LLVM Team, University of Illinois at + * Urbana-Champaign, nor the names of its contributors may be used to + * endorse or promote products derived from this Software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH + * THE SOFTWARE. + */ + +#include "fp16.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +half _CLC_DEF _CLC_OVERLOAD exp10(half x) { + float result_f = (half)exp2((float)x * 0x1.a934f0p+1f); + half result = (half)result_f; + result = fabs(result_f) >= HALF_MAX + ? copysign(as_half((short)PINFBITPATT_DP16), result) + : result; + result = isnan(x) ? as_half((short)QNANBITPATT_DP16) : result; + return result; +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, exp10, half); + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/expm1.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/expm1.cl @@ -0,0 +1,63 @@ +/* + * University of Illinois/NCSA + * Open Source License + * + * Copyright (c) 2014-2016, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * with the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the names of the LLVM Team, University of Illinois at + * Urbana-Champaign, nor the names of its contributors may be used to + * endorse or promote products derived from this Software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH + * THE SOFTWARE. + */ + +#include "fp16.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +half _CLC_DEF _CLC_OVERLOAD expm1(half x) { + float result_f = exp2((float)x * 0x1.715476p+0f) - 1.0f; + half result = (half)result_f; + half p = fma(x, x * fma(x, 0x1.555556p-3h, 0.5h), x); + result = fabs(x) < 0x1.0p-6h ? p : result; + result = fabs(result_f) >= HALF_MAX + ? copysign(as_half((short)PINFBITPATT_DP16), result) + : result; + result = isnan(x) ? as_half((short)QNANBITPATT_DP16) : result; + return result; +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, expm1, half); + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/fp16.h =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/fp16.h @@ -0,0 +1,29 @@ +#include +#include + +#include "../../../../generic/lib/clcmacro.h" +#include "../../../../generic/lib/math/math.h" + +#ifndef FP16_H_INCLUDED +#define FP16_H_INCLUDED + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +#define HALF_MAX 65504.f + +#define _CLC_HALF_IMPL_UNARY(RET_TY, FUNCTION, OP_TY) \ + _CLC_OVERLOAD _CLC_DEF RET_TY FUNCTION(OP_TY x) { \ + return (RET_TY)FUNCTION((float)x); \ + } \ + _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TY, FUNCTION, OP_TY) + +#define _CLC_HALF_IMPL_BINARY(FUNCTION) \ + _CLC_OVERLOAD _CLC_DEF half FUNCTION(half x, half y) { \ + return (half)FUNCTION((float)x, (float)y); \ + } \ + _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half, half) + +#endif // cl_khr_fp16 + +#endif // FP16_H_INCLUDED Index: libclc/clspv/lib/math/fp16/hypot.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/hypot.cl @@ -0,0 +1,68 @@ +/* + * University of Illinois/NCSA + * Open Source License + * + * Copyright (c) 2014-2016, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * with the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the names of the LLVM Team, University of Illinois at + * Urbana-Champaign, nor the names of its contributors may be used to + * endorse or promote products derived from this Software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH + * THE SOFTWARE. + */ + +#include "fp16.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +half _CLC_DEF _CLC_OVERLOAD hypot(half x, half y) { + float fx = (float)x; + float fy = (float)y; + float d2 = fma(fx, fx, fy * fy); + + float result_f = (half)sqrt(d2); + half result = (half)result_f; + + result = fabs(result_f) >= HALF_MAX + ? copysign(as_half((short)PINFBITPATT_DP16), result) + : result; + result = isnan(x) || isnan(y) ? as_half((short)QNANBITPATT_DP16) : result; + result = + (isinf(x) || isinf(y)) ? as_half(((ushort)PINFBITPATT_DP16)) : result; + return result; +} + +_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, hypot, half, half) + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/ldexp.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/ldexp.cl @@ -0,0 +1,42 @@ +#include "fp16.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF half ldexp(half x, int k) { + float p = pown(2.f, k); + float f_result = (float)x * p; + half result = 0.f; + // If the result would be a subnormal when represented with a half we need to + // be careful about rounding + if (fabs(f_result) < 0.00006104f) { + float subnormal_rounded = f_result; + uint sub_rnd = (uint)0x01000000; + uint sub_mul = (uint)0x46000000; + subnormal_rounded *= as_float(sub_rnd); // round subnormals + subnormal_rounded *= as_float(sub_mul); // correct subnormal exp + result = (half)subnormal_rounded; + } else { + result = (half)f_result; + } + + result = isinf(f_result) ? (as_short(result) & (short)0x8000 + ? as_half((short)NINFBITPATT_DP16) + : as_half((short)PINFBITPATT_DP16)) + : result; + result = isinf(x) ? (as_short(x) & (short)0x8000 + ? as_half((short)NINFBITPATT_DP16) + : as_half((short)PINFBITPATT_DP16)) + : result; + + result = fabs(f_result) > HALF_MAX && !isnan(x) + ? x >= 0.h ? as_half((short)PINFBITPATT_DP16) + : as_half((short)NINFBITPATT_DP16) + : result; + result = x == 0.h || x == -0.h || k == 0 ? x : result; + return result; +} + +_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, ldexp, half, int) + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/log.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/log.cl @@ -0,0 +1,55 @@ +/* + * University of Illinois/NCSA + * Open Source License + * + * Copyright (c) 2014-2016, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * with the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the names of the LLVM Team, University of Illinois at + * Urbana-Champaign, nor the names of its contributors may be used to + * endorse or promote products derived from this Software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH + * THE SOFTWARE. + */ + +#include "fp16.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +half _CLC_DEF _CLC_OVERLOAD log(half x) { + return (half)(log2((float)x) * 0x1.62e430p-1f); +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, log, half); + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/log10.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/log10.cl @@ -0,0 +1,55 @@ +/* + * University of Illinois/NCSA + * Open Source License + * + * Copyright (c) 2014-2016, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * with the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the names of the LLVM Team, University of Illinois at + * Urbana-Champaign, nor the names of its contributors may be used to + * endorse or promote products derived from this Software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH + * THE SOFTWARE. + */ + +#include "fp16.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +half _CLC_DEF _CLC_OVERLOAD log10(half x) { + return (half)(log2((float)x) * 0x1.344136p-2f); +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, log10, half); + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/pow.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/pow.cl @@ -0,0 +1,24 @@ +#include "fp16.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEF _CLC_OVERLOAD half pow(half x, half y) { + float float_result = pow((float)x, (float)y); + half ret = (half)float_result; + ret = isinf(float_result) ? float_result >= 0.f + ? as_half((short)PINFBITPATT_DP16) + : as_half((short)NINFBITPATT_DP16) + : ret; + ret = isnan(float_result) ? as_half((short)QNANBITPATT_DP16) : ret; + ret = !isinf(ret) && fabs(float_result) > HALF_MAX + ? float_result >= 0.f ? as_half((ushort)PINFBITPATT_DP16) + : as_half((ushort)NINFBITPATT_DP16) + : ret; + + return ret; +} + +_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, pow, half, half) + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/pown.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/pown.cl @@ -0,0 +1,23 @@ +#include "fp16.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEF _CLC_OVERLOAD half pown(half x, int n) { + float float_result = pown((float)x, n); + half ret = (half)float_result; + ret = isinf(float_result) ? float_result >= 0.f + ? as_half((short)PINFBITPATT_DP16) + : as_half((short)NINFBITPATT_DP16) + : ret; + ret = isnan(float_result) ? as_half((short)QNANBITPATT_DP16) : ret; + ret = !isinf(ret) && fabs(float_result) > HALF_MAX + ? float_result >= 0.f ? as_half((ushort)PINFBITPATT_DP16) + : as_half((ushort)NINFBITPATT_DP16) + : ret; + return ret; +} + +_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, pown, half, int) + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/powr.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/powr.cl @@ -0,0 +1,31 @@ +#include "fp16.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF half powr(half x, half k) { + float f_result = powr((float)x, (float)k); + half result = (half)f_result; + result = isinf(f_result) ? (as_short(result) & (short)0x8000 + ? as_half((short)NINFBITPATT_DP16) + : as_half((short)PINFBITPATT_DP16)) + : result; + + result = f_result > HALF_MAX && !isnan(x) ? as_half((short)PINFBITPATT_DP16) + : result; + bool x_inf = isinf(x); + bool x_negative = as_short(x) & 0x8000; + bool k_negative = as_short(k) & 0x8000; + bool k_nan = isnan(k); + bool k_zero = k == 0.h || k == -0.h; + result = x_inf ? (k_zero ? as_half((short)QNANBITPATT_DP16) : x) : result; + result = x_inf && x_negative ? as_half((short)QNANBITPATT_DP16) : result; + result = x_inf && k_negative && !x_negative && !k_zero ? as_half((short)0) + : result; + result = x_inf && k_nan ? k : result; + return result; +} + +_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, powr, half, half) + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/rootn.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/rootn.cl @@ -0,0 +1,23 @@ +#include "fp16.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEF _CLC_OVERLOAD half rootn(half x, int n) { + float float_result = rootn((float)x, n); + half ret = (half)float_result; + ret = isinf(float_result) ? float_result >= 0.f + ? as_half((short)PINFBITPATT_DP16) + : as_half((short)NINFBITPATT_DP16) + : ret; + ret = isnan(float_result) ? as_half((short)QNANBITPATT_DP16) : ret; + ret = !isinf(ret) && fabs(float_result) > HALF_MAX + ? float_result >= 0.f ? as_half((ushort)PINFBITPATT_DP16) + : as_half((ushort)NINFBITPATT_DP16) + : ret; + return ret; +} + +_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, rootn, half, int) + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/signbit.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/signbit.cl @@ -0,0 +1,99 @@ +/* + * University of Illinois/NCSA + * Open Source License + * + * Copyright (c) 2014-2016, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * with the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the names of the LLVM Team, University of Illinois at + * Urbana-Champaign, nor the names of its contributors may be used to + * endorse or promote products derived from this Software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH + * THE SOFTWARE. + */ + +#include "fp16.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +int _CLC_DEF _CLC_OVERLOAD signbit(half x) { return as_short(x) < 0; } + +short2 _CLC_DEF _CLC_OVERLOAD signbit(half2 x) { + return (short2)(as_short(x.lo) < 0 ? (short)-1 : (short)0, + as_short(x.hi) < 0 ? (short)-1 : (short)0); +} + +short3 _CLC_DEF _CLC_OVERLOAD signbit(half3 x) { + return (short3)(as_short(x.x) < 0 ? (short)-1 : (short)0, + as_short(x.y) < 0 ? (short)-1 : (short)0, + as_short(x.z) < 0 ? (short)-1 : (short)0); +} + +short4 _CLC_DEF _CLC_OVERLOAD signbit(half4 x) { + return (short4)(as_short(x.x) < 0 ? (short)-1 : (short)0, + as_short(x.y) < 0 ? (short)-1 : (short)0, + as_short(x.z) < 0 ? (short)-1 : (short)0, + as_short(x.w) < 0 ? (short)-1 : (short)0); +} + +short8 _CLC_DEF _CLC_OVERLOAD signbit(half8 x) { + return (short8)(as_short(x.s0) < 0 ? (short)-1 : (short)0, + as_short(x.s1) < 0 ? (short)-1 : (short)0, + as_short(x.s2) < 0 ? (short)-1 : (short)0, + as_short(x.s3) < 0 ? (short)-1 : (short)0, + as_short(x.s4) < 0 ? (short)-1 : (short)0, + as_short(x.s5) < 0 ? (short)-1 : (short)0, + as_short(x.s6) < 0 ? (short)-1 : (short)0, + as_short(x.s7) < 0 ? (short)-1 : (short)0); +} + +short16 _CLC_DEF _CLC_OVERLOAD signbit(half16 x) { + return (short16)(as_short(x.s0) < 0 ? (short)-1 : (short)0, + as_short(x.s1) < 0 ? (short)-1 : (short)0, + as_short(x.s2) < 0 ? (short)-1 : (short)0, + as_short(x.s3) < 0 ? (short)-1 : (short)0, + as_short(x.s4) < 0 ? (short)-1 : (short)0, + as_short(x.s5) < 0 ? (short)-1 : (short)0, + as_short(x.s6) < 0 ? (short)-1 : (short)0, + as_short(x.s7) < 0 ? (short)-1 : (short)0, + as_short(x.s8) < 0 ? (short)-1 : (short)0, + as_short(x.s9) < 0 ? (short)-1 : (short)0, + as_short(x.sa) < 0 ? (short)-1 : (short)0, + as_short(x.sb) < 0 ? (short)-1 : (short)0, + as_short(x.sc) < 0 ? (short)-1 : (short)0, + as_short(x.sd) < 0 ? (short)-1 : (short)0, + as_short(x.se) < 0 ? (short)-1 : (short)0, + as_short(x.sf) < 0 ? (short)-1 : (short)0); +} + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/sin.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/sin.cl @@ -0,0 +1,66 @@ +/* + * University of Illinois/NCSA + * Open Source License + * + * Copyright (c) 2014-2016, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * with the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the names of the LLVM Team, University of Illinois at + * Urbana-Champaign, nor the names of its contributors may be used to + * endorse or promote products derived from this Software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH + * THE SOFTWARE. + */ + +#include "fp16.h" +#include "trig.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEF _CLC_OVERLOAD half sin(half x) { + half ax = fabs(x); + struct redret r = trigred(ax); + struct scret sc = sincosred(r.hi); + + short s = as_short((r.i & (short)1) == (short)0 ? sc.s : sc.c); + s ^= (r.i > (short)1 ? (short)0x8000 : (short)0) ^ + (as_short(x) & (short)0x8000); + + s = isinf(ax) ? (short)QNANBITPATT_DP16 : s; + + return as_half(s); +} + +_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, sin, half) + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/sinh.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/sinh.cl @@ -0,0 +1,68 @@ +/* + * University of Illinois/NCSA + * Open Source License + * + * Copyright (c) 2014-2016, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * with the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the names of the LLVM Team, University of Illinois at + * Urbana-Champaign, nor the names of its contributors may be used to + * endorse or promote products derived from this Software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH + * THE SOFTWARE. + */ + +#include "fp16.h" +#include "trig.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +half _CLC_DEF _CLC_OVERLOAD sinh(half hx) { + float x = (float)hx * 0x1.715476p+0f; + float float_result = (0.5f * (exp2(x) - exp2(-x))); + half result = (half)float_result; + result = isinf(result) ? as_half((short)PINFBITPATT_DP16) : result; + + result = fabs(float_result) > HALF_MAX + ? ((as_short(hx) & 0x8000) ? as_half((short)NINFBITPATT_DP16) + : as_half((short)PINFBITPATT_DP16)) + : result; + result = isnan(hx) ? as_half((short)QNANBITPATT_DP16) : result; + result = hx == 0.h ? 0.h : result; + result = float_result > HALF_MAX ? as_half((short)PINFBITPATT_DP16) : result; + return result; +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, sinh, half); + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/sinpi.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/sinpi.cl @@ -0,0 +1,65 @@ +/* + * University of Illinois/NCSA + * Open Source License + * + * Copyright (c) 2014-2016, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * with the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the names of the LLVM Team, University of Illinois at + * Urbana-Champaign, nor the names of its contributors may be used to + * endorse or promote products derived from this Software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH + * THE SOFTWARE. + */ + +#include "fp16.h" +#include "trig.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEF _CLC_OVERLOAD half sinpi(half x) { + struct redret r = trigpired(fabs(x)); + struct scret sc = sincospired(r.hi); + + short s = as_short((r.i & (short)1) == (short)0 ? sc.s : sc.c); + s ^= (r.i > (short)1 ? (short)0x8000 : (short)0) ^ + (as_short(x) & (short)0x8000); + + s = isnan(x) || isinf(x) ? (short)QNANBITPATT_DP16 : s; + + return as_half(s); +} + +_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, sinpi, half) + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/tan.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/tan.cl @@ -0,0 +1,62 @@ +/* + * University of Illinois/NCSA + * Open Source License + * + * Copyright (c) 2014-2016, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * with the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the names of the LLVM Team, University of Illinois at + * Urbana-Champaign, nor the names of its contributors may be used to + * endorse or promote products derived from this Software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH + * THE SOFTWARE. + */ + +#include "fp16.h" +#include "trig.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEF _CLC_OVERLOAD half tan(half x) { + half ax = fabs(x); + struct redret r = trigred(ax); + short t = as_short(tanred(r.hi, r.i & (short)1)); + t ^= as_short(x) & (short)0x8000; + + t = isinf(ax) ? (short)QNANBITPATT_DP16 : t; + return as_half(t); +} + +_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, tan, half) + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/tanpi.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/tanpi.cl @@ -0,0 +1,65 @@ +/* + * University of Illinois/NCSA + * Open Source License + * + * Copyright (c) 2014-2016, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * with the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the names of the LLVM Team, University of Illinois at + * Urbana-Champaign, nor the names of its contributors may be used to + * endorse or promote products derived from this Software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH + * THE SOFTWARE. + */ + +#include "fp16.h" +#include "trig.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEF _CLC_OVERLOAD half tanpi(half x) { + struct redret r = trigpired(fabs(x)); + short t = as_short(tanpired(r.hi, r.i & (short)1)); + t ^= (((r.i == (short)1) | (r.i == (short)2)) & (r.hi == 0.0h)) + ? (short)0x8000 + : (short)0; + t ^= as_short(x) & (short)0x8000; + + t = isnan(x) || isinf(x) ? (short)QNANBITPATT_DP16 : t; + + return as_half(t); +} + +_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, tanpi, half) + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/trig.h =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/trig.h @@ -0,0 +1,69 @@ +/* + * University of Illinois/NCSA + * Open Source License + * + * Copyright (c) 2014-2016, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * with the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the names of the LLVM Team, University of Illinois at + * Urbana-Champaign, nor the names of its contributors may be used to + * endorse or promote products derived from this Software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH + * THE SOFTWARE. + */ + +#ifndef FP16_TRIG_H_INCLUDED +#define FP16_TRIG_H_INCLUDED + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +struct redret { + half hi; + short i; +}; + +struct scret { + half s; + half c; +}; + +extern struct scret sincosred(half x); +extern struct redret trigred(half hx); +extern struct redret trigpired(half x); +extern struct scret sincospired(half x); +extern half tanpired(half x, short i); +extern half tanred(half x, short i); + +#endif // cl_khr_fp16 + +#endif // FP16_TRIG_H_INCLUDED Index: libclc/clspv/lib/math/fp16/trig.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/trig.cl @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "fp16.h" +#include "trig.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +struct scret sincosred(half x) { + half t = x * x; + half s = mad(x, t * mad(t, 0x1.0bp-7h, -0x1.554p-3h), x); + half c = mad(t, mad(t, 0x1.4b4p-5h, -0x1.ffcp-2h), 1.0h); + + struct scret ret; + ret.c = c; + ret.s = s; + return ret; +}; + +struct redret trigred(half hx) { + const float twobypi = 0x1.45f306p-1f; + const float pb2_a = 0x1.92p+0f; + const float pb2_b = 0x1.fap-12f; + const float pb2_c = 0x1.54442ep-20f; + + float x = (float)hx; + float fn = rint(x * twobypi); + + float ret_hi = mad(fn, -pb2_c, mad(fn, -pb2_b, mad(fn, -pb2_a, x))); + + struct redret ret; + ret.hi = ret_hi >= HALF_MAX ? as_half((short)PINFBITPATT_DP16) : (half)ret_hi; + ret.i = (int)fn & 0x3; + return ret; +} + +struct redret trigpired(half x) { + half fraction = fmin(0.5h * x - floor(0.5h * x), as_half((short)0x3BFF)); + half t = 2.0h * fraction; + x = x > 1.0h ? t : x; + t = rint(2.0h * x); + + struct redret ret; + ret.hi = mad(t, -0.5h, x); + ret.i = (short)t & (short)0x3; + return ret; +} + +struct scret sincospired(half x) { + half t = x * x; + + half sx = mad(t, 0x1.b84p+0h, -0x1.46cp+2h); + sx = x * t * sx; + sx = mad(x, 0x1.92p+1h, sx); + + half cx = mad(t, 0x1.fbp+1h, -0x1.3bcp+2h); + cx = mad(t, cx, 1.0h); + + struct scret ret; + ret.c = cx; + ret.s = sx; + return ret; +} + +half tanpired(half x, short i) { + half s = x * x; + + half t = mad(s, mad(s, 0x1.3d8p+8h, 0x1.fe4p+4h), 0x1.508p+3h); + + t = x * s * t; + t = mad(x, 0x1.92p+1h, t); + + half tr = -native_recip(t); + + return i ? tr : t; +} + +half tanred(half x, short i) { + half s = x * x; + + half t = mad(s, mad(s, 0x1.794p-4h, 0x1.e3cp-4h), 0x1.57p-2h); + t = mad(x, s * t, x); + + half tr = -(1 / t); + + return i ? tr : t; +} + +#endif // cl_khr_fp16 Index: libclc/clspv/lib/math/fp16/upcast.cl =================================================================== --- /dev/null +++ libclc/clspv/lib/math/fp16/upcast.cl @@ -0,0 +1,20 @@ +#include "fp16.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_HALF_IMPL_UNARY(half, acosh, half) +_CLC_HALF_IMPL_UNARY(half, atan, half) +_CLC_HALF_IMPL_UNARY(half, atanpi, half) +_CLC_HALF_IMPL_UNARY(half, lgamma, half) +_CLC_HALF_IMPL_UNARY(half, log1p, half) +_CLC_HALF_IMPL_UNARY(half, logb, half) +_CLC_HALF_IMPL_UNARY(half, tanh, half) +_CLC_HALF_IMPL_UNARY(int, ilogb, half) + +_CLC_HALF_IMPL_BINARY(atan2) +_CLC_HALF_IMPL_BINARY(atan2pi) +_CLC_HALF_IMPL_BINARY(fmod) +_CLC_HALF_IMPL_BINARY(remainder) + +#endif Index: libclc/generic/lib/math/ldexp.cl =================================================================== --- libclc/generic/lib/math/ldexp.cl +++ libclc/generic/lib/math/ldexp.cl @@ -35,12 +35,7 @@ _CLC_DEFINE_BINARY_BUILTIN(double, ldexp, __clc_ldexp, double, int) #endif -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_BINARY_BUILTIN(half, ldexp, __clc_ldexp, half, int) -#endif +// TODO: add declaration for half when it's been implemented. // This defines all the ldexp(GENTYPE, int) variants #define __CLC_BODY Index: libclc/generic/lib/math/ldexp.inc =================================================================== --- libclc/generic/lib/math/ldexp.inc +++ libclc/generic/lib/math/ldexp.inc @@ -20,9 +20,6 @@ * THE SOFTWARE. */ -// TODO: Enable half precision when ldexp is implemented. -#if __CLC_FPSIZE > 16 - #ifndef __CLC_SCALAR _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE ldexp(__CLC_GENTYPE x, int n) { @@ -30,5 +27,3 @@ } #endif - -#endif Index: libclc/generic/lib/math/math.h =================================================================== --- libclc/generic/lib/math/math.h +++ libclc/generic/lib/math/math.h @@ -113,5 +113,31 @@ #endif // cl_khr_fp64 +#ifdef cl_khr_fp16 + +#define SIGNBIT_DP16 0x8000 +#define EXSIGNBIT_DP16 0x7fff +#define EXPBITS_DP16 0x7c00 +#define MANTBITS_DP16 0x03ff +#define ONEEXPBITS_DP16 0x3c00 +#define TWOEXPBITS_DP16 0x4000 +#define HALFEXPBITS_DP16 0x3800 +#define IMPBIT_DP16 0x0400 +#define QNANBITPATT_DP16 0x7e80 +#define INDEFBITPATT_DP16 0xfe00 +#define PINFBITPATT_DP16 0x7c00 +#define NINFBITPATT_DP16 0xfc00 +#define EXPBIAS_DP16 15 +#define EXPSHIFTBITS_DP16 10 +#define BIASEDEMIN_DP16 1 +#define EMIN_DP16 -14 +#define BIASEDEMAX_DP16 120 /* 0x78 */ +#define EMAX_DP16 15 /* 0xf */ +#define LAMBDA_DP16 1.0e4 +#define MANTLENGTH_DP16 11 +#define BASEDIGITS_DP16 3 + +#endif // cl_khr_fp16 + #define ALIGNED(x) __attribute__((aligned(x))) #endif // __CLC_MATH_H_