diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -475,6 +475,7 @@ expf.cpp HDRS ../expf.h + expxf.h DEPENDS .common_constants libc.src.__support.FPUtil.fputil @@ -491,6 +492,7 @@ HDRS ../exp2f.h DEPENDS + .common_constants libc.src.__support.FPUtil.fputil libc.src.__support.FPUtil.polyeval libc.include.math @@ -504,6 +506,7 @@ expm1f.cpp HDRS ../expm1f.h + expxf.h DEPENDS .common_constants libc.src.__support.FPUtil.fputil diff --git a/libc/src/math/generic/common_constants.h b/libc/src/math/generic/common_constants.h --- a/libc/src/math/generic/common_constants.h +++ b/libc/src/math/generic/common_constants.h @@ -17,19 +17,11 @@ // Lookup table for log(f) = log(1 + n*2^(-7)) where n = 0..127. extern const double LOG_F[128]; -// Lookup table for exp(m) with m = -104, ..., 89. -// -104 = floor(log(single precision's min denormal)) -// 89 = ceil(log(single precision's max normal)) -// Table is generated with Sollya as follow: -// > display = hexadecimal; -// > for i from -104 to 89 do { D(exp(i)); }; -extern const double EXP_M1[195]; - -// Lookup table for exp(m * 2^(-7)) with m = 0, ..., 127. -// Table is generated with Sollya as follow: -// > display = hexadecimal; -// > for i from 0 to 127 do { D(exp(i / 128)); }; -extern const double EXP_M2[128]; +static constexpr int EXP_bits_p = 5; +static constexpr int EXP_num_p = 1 << EXP_bits_p; + +// > for i from 0 to 31 do { D(2^(i / 32)) - 1; }; +extern const double EXP_M[EXP_num_p]; } // namespace __llvm_libc diff --git a/libc/src/math/generic/common_constants.cpp b/libc/src/math/generic/common_constants.cpp --- a/libc/src/math/generic/common_constants.cpp +++ b/libc/src/math/generic/common_constants.cpp @@ -102,128 +102,37 @@ 0x1.58cadb5cd7989p-1, 0x1.5ad404c359f2cp-1, 0x1.5cdb1dc6c1764p-1, 0x1.5ee02a9241675p-1, 0x1.60e32f44788d8p-1}; -// Lookup table for exp(m) with m = -104, ..., 89. -// -104 = floor(log(single precision's min denormal)) -// 89 = ceil(log(single precision's max normal)) -// Table is generated with Sollya as follow: -// > display = hexadecimal; -// > for i from -104 to 89 do { D(exp(i)); }; -const double EXP_M1[195] = { - 0x1.f1e6b68529e33p-151, 0x1.525be4e4e601dp-149, 0x1.cbe0a45f75eb1p-148, - 0x1.3884e838aea68p-146, 0x1.a8c1f14e2af5dp-145, 0x1.20a717e64a9bdp-143, - 0x1.8851d84118908p-142, 0x1.0a9bdfb02d240p-140, 0x1.6a5bea046b42ep-139, - 0x1.ec7f3b269efa8p-138, 0x1.4eafb87eab0f2p-136, 0x1.c6e2d05bbc000p-135, - 0x1.35208867c2683p-133, 0x1.a425b317eeacdp-132, 0x1.1d8508fa8246ap-130, - 0x1.840fbc08fdc8ap-129, 0x1.07b7112bc1ffep-127, 0x1.666d0dad2961dp-126, - 0x1.e726c3f64d0fep-125, 0x1.4b0dc07cabf98p-123, 0x1.c1f2daf3b6a46p-122, - 0x1.31c5957a47de2p-120, 0x1.9f96445648b9fp-119, 0x1.1a6baeadb4fd1p-117, - 0x1.7fd974d372e45p-116, 0x1.04da4d1452919p-114, 0x1.62891f06b3450p-113, - 0x1.e1dd273aa8a4ap-112, 0x1.4775e0840bfddp-110, 0x1.bd109d9d94bdap-109, - 0x1.2e73f53fba844p-107, 0x1.9b138170d6bfep-106, 0x1.175af0cf60ec5p-104, - 0x1.7baee1bffa80bp-103, 0x1.02057d1245cebp-101, 0x1.5eafffb34ba31p-100, - 0x1.dca23bae16424p-99, 0x1.43e7fc88b8056p-97, 0x1.b83bf23a9a9ebp-96, - 0x1.2b2b8dd05b318p-94, 0x1.969d47321e4ccp-93, 0x1.1452b7723aed2p-91, - 0x1.778fe2497184cp-90, 0x1.fe7116182e9ccp-89, 0x1.5ae191a99585ap-87, - 0x1.d775d87da854dp-86, 0x1.4063f8cc8bb98p-84, 0x1.b374b315f87c1p-83, - 0x1.27ec458c65e3cp-81, 0x1.923372c67a074p-80, 0x1.1152eaeb73c08p-78, - 0x1.737c5645114b5p-77, 0x1.f8e6c24b5592ep-76, 0x1.571db733a9d61p-74, - 0x1.d257d547e083fp-73, 0x1.3ce9b9de78f85p-71, 0x1.aebabae3a41b5p-70, - 0x1.24b6031b49bdap-68, 0x1.8dd5e1bb09d7ep-67, 0x1.0e5b73d1ff53dp-65, - 0x1.6f741de1748ecp-64, 0x1.f36bd37f42f3ep-63, 0x1.536452ee2f75cp-61, - 0x1.cd480a1b74820p-60, 0x1.39792499b1a24p-58, 0x1.aa0de4bf35b38p-57, - 0x1.2188ad6ae3303p-55, 0x1.898471fca6055p-54, 0x1.0b6c3afdde064p-52, - 0x1.6b7719a59f0e0p-51, 0x1.ee001eed62aa0p-50, 0x1.4fb547c775da8p-48, - 0x1.c8464f7616468p-47, 0x1.36121e24d3bbap-45, 0x1.a56e0c2ac7f75p-44, - 0x1.1e642baeb84a0p-42, 0x1.853f01d6d53bap-41, 0x1.0885298767e9ap-39, - 0x1.67852a7007e42p-38, 0x1.e8a37a45fc32ep-37, 0x1.4c1078fe9228ap-35, - 0x1.c3527e433fab1p-34, 0x1.32b48bf117da2p-32, 0x1.a0db0d0ddb3ecp-31, - 0x1.1b48655f37267p-29, 0x1.81056ff2c5772p-28, 0x1.05a628c699fa1p-26, - 0x1.639e3175a689dp-25, 0x1.e355bbaee85cbp-24, 0x1.4875ca227ec38p-22, - 0x1.be6c6fdb01612p-21, 0x1.2f6053b981d98p-19, 0x1.9c54c3b43bc8bp-18, - 0x1.18354238f6764p-16, 0x1.7cd79b5647c9bp-15, 0x1.02cf22526545ap-13, - 0x1.5fc21041027adp-12, 0x1.de16b9c24a98fp-11, 0x1.44e51f113d4d6p-9, - 0x1.b993fe00d5376p-8, 0x1.2c155b8213cf4p-6, 0x1.97db0ccceb0afp-5, - 0x1.152aaa3bf81ccp-3, 0x1.78b56362cef38p-2, 0x1.0000000000000p+0, - 0x1.5bf0a8b145769p+1, 0x1.d8e64b8d4ddaep+2, 0x1.415e5bf6fb106p+4, - 0x1.b4c902e273a58p+5, 0x1.28d389970338fp+7, 0x1.936dc5690c08fp+8, - 0x1.122885aaeddaap+10, 0x1.749ea7d470c6ep+11, 0x1.fa7157c470f82p+12, - 0x1.5829dcf950560p+14, 0x1.d3c4488ee4f7fp+15, 0x1.3de1654d37c9ap+17, - 0x1.b00b5916ac955p+18, 0x1.259ac48bf05d7p+20, 0x1.8f0ccafad2a87p+21, - 0x1.0f2ebd0a80020p+23, 0x1.709348c0ea4f9p+24, 0x1.f4f22091940bdp+25, - 0x1.546d8f9ed26e1p+27, 0x1.ceb088b68e804p+28, 0x1.3a6e1fd9eecfdp+30, - 0x1.ab5adb9c43600p+31, 0x1.226af33b1fdc1p+33, 0x1.8ab7fb5475fb7p+34, - 0x1.0c3d3920962c9p+36, 0x1.6c932696a6b5dp+37, 0x1.ef822f7f6731dp+38, - 0x1.50bba3796379ap+40, 0x1.c9aae4631c056p+41, 0x1.370470aec28edp+43, - 0x1.a6b765d8cdf6dp+44, 0x1.1f43fcc4b662cp+46, 0x1.866f34a725782p+47, - 0x1.0953e2f3a1ef7p+49, 0x1.689e221bc8d5bp+50, 0x1.ea215a1d20d76p+51, - 0x1.4d13fbb1a001ap+53, 0x1.c4b334617cc67p+54, 0x1.33a43d282a519p+56, - 0x1.a220d397972ebp+57, 0x1.1c25c88df6862p+59, 0x1.8232558201159p+60, - 0x1.0672a3c9eb871p+62, 0x1.64b41c6d37832p+63, 0x1.e4cf766fe49bep+64, - 0x1.49767bc0483e3p+66, 0x1.bfc951eb8bb76p+67, 0x1.304d6aeca254bp+69, - 0x1.9d97010884251p+70, 0x1.19103e4080b45p+72, 0x1.7e013cd114461p+73, - 0x1.03996528e074cp+75, 0x1.60d4f6fdac731p+76, 0x1.df8c5af17ba3bp+77, - 0x1.45e3076d61699p+79, 0x1.baed16a6e0da7p+80, 0x1.2cffdfebde1a1p+82, - 0x1.9919cabefcb69p+83, 0x1.160345c9953e3p+85, 0x1.79dbc9dc53c66p+86, - 0x1.00c810d464097p+88, 0x1.5d009394c5c27p+89, 0x1.da57de8f107a8p+90, - 0x1.425982cf597cdp+92, 0x1.b61e5ca3a5e31p+93, 0x1.29bb825dfcf87p+95, - 0x1.94a90db0d6fe2p+96, 0x1.12fec759586fdp+98, 0x1.75c1dc469e3afp+99, - 0x1.fbfd219c43b04p+100, 0x1.5936d44e1a146p+102, 0x1.d531d8a7ee79cp+103, - 0x1.3ed9d24a2d51bp+105, 0x1.b15cfe5b6e17bp+106, 0x1.268038c2c0e00p+108, - 0x1.9044a73545d48p+109, 0x1.1002ab6218b38p+111, 0x1.71b3540cbf921p+112, - 0x1.f6799ea9c414ap+113, 0x1.55779b984f3ebp+115, 0x1.d01a210c44aa4p+116, - 0x1.3b63da8e91210p+118, 0x1.aca8d6b0116b8p+119, 0x1.234de9e0c74e9p+121, - 0x1.8bec7503ca477p+122, 0x1.0d0eda9796b90p+124, 0x1.6db0118477245p+125, - 0x1.f1056dc7bf22dp+126, 0x1.51c2cc3433801p+128, 0x1.cb108ffbec164p+129, -}; - -// Lookup table for exp(m * 2^(-7)) with m = 0, ..., 127. -// Table is generated with Sollya as follow: -// > display = hexadecimal; -// > for i from 0 to 127 do { D(exp(i / 128)); }; -const double EXP_M2[128] = { - 0x1.0000000000000p0, 0x1.0202015600446p0, 0x1.04080ab55de39p0, - 0x1.06122436410ddp0, 0x1.08205601127edp0, 0x1.0a32a84e9c1f6p0, - 0x1.0c49236829e8cp0, 0x1.0e63cfa7ab09dp0, 0x1.1082b577d34edp0, - 0x1.12a5dd543ccc5p0, 0x1.14cd4fc989cd6p0, 0x1.16f9157587069p0, - 0x1.192937074e0cdp0, 0x1.1b5dbd3f68122p0, 0x1.1d96b0eff0e79p0, - 0x1.1fd41afcba45ep0, 0x1.2216045b6f5cdp0, 0x1.245c7613b8a9bp0, - 0x1.26a7793f60164p0, 0x1.28f7170a755fdp0, 0x1.2b4b58b372c79p0, - 0x1.2da4478b620c7p0, 0x1.3001ecf601af7p0, 0x1.32645269ea829p0, - 0x1.34cb8170b5835p0, 0x1.373783a722012p0, 0x1.39a862bd3c106p0, - 0x1.3c1e2876834aap0, 0x1.3e98deaa11dccp0, 0x1.41188f42c3e32p0, - 0x1.439d443f5f159p0, 0x1.462707b2bac21p0, 0x1.48b5e3c3e8186p0, - 0x1.4b49e2ae5ac67p0, 0x1.4de30ec211e60p0, 0x1.50817263c13cdp0, - 0x1.5325180cfacf7p0, 0x1.55ce0a4c58c7cp0, 0x1.587c53c5a7af0p0, - 0x1.5b2fff3210fd9p0, 0x1.5de9176045ff5p0, 0x1.60a7a734ab0e8p0, - 0x1.636bb9a983258p0, 0x1.663559cf1bc7cp0, 0x1.690492cbf9433p0, - 0x1.6bd96fdd034a2p0, 0x1.6eb3fc55b1e76p0, 0x1.719443a03acb9p0, - 0x1.747a513dbef6ap0, 0x1.776630c678bc1p0, 0x1.7a57ede9ea23ep0, - 0x1.7d4f946f0ba8dp0, 0x1.804d30347b546p0, 0x1.8350cd30ac390p0, - 0x1.865a7772164c5p0, 0x1.896a3b1f66a0ep0, 0x1.8c802477b0010p0, - 0x1.8f9c3fd29beafp0, 0x1.92be99a09bf00p0, 0x1.95e73e6b1b75ep0, - 0x1.99163ad4b1dccp0, 0x1.9c4b9b995509bp0, 0x1.9f876d8e8c566p0, - 0x1.a2c9bda3a3e78p0, 0x1.a61298e1e069cp0, 0x1.a9620c6cb3374p0, - 0x1.acb82581eee54p0, 0x1.b014f179fc3b8p0, 0x1.b3787dc80f95fp0, - 0x1.b6e2d7fa5eb18p0, 0x1.ba540dba56e56p0, 0x1.bdcc2cccd3c85p0, - 0x1.c14b431256446p0, 0x1.c4d15e873c193p0, 0x1.c85e8d43f7cd0p0, - 0x1.cbf2dd7d490f2p0, 0x1.cf8e5d84758a9p0, 0x1.d3311bc7822b4p0, - 0x1.d6db26d16cd67p0, 0x1.da8c8d4a66969p0, 0x1.de455df80e3c0p0, - 0x1.e205a7bdab73ep0, 0x1.e5cd799c6a54ep0, 0x1.e99ce2b397649p0, - 0x1.ed73f240dc142p0, 0x1.f152b7a07bb76p0, 0x1.f539424d90f5ep0, - 0x1.f927a1e24bb76p0, 0x1.fd1de6182f8c9p0, 0x1.008e0f64294abp1, - 0x1.02912df5ce72ap1, 0x1.049856cd84339p1, 0x1.06a39207f0a09p1, - 0x1.08b2e7d2035cfp1, 0x1.0ac6606916501p1, 0x1.0cde041b0e9aep1, - 0x1.0ef9db467dcf8p1, 0x1.1119ee5ac36b6p1, 0x1.133e45d82e952p1, - 0x1.1566ea50201d7p1, 0x1.1793e4652cc50p1, 0x1.19c53ccb3fc6bp1, - 0x1.1bfafc47bda73p1, 0x1.1e352bb1a74adp1, 0x1.2073d3f1bd518p1, - 0x1.22b6fe02a3b9cp1, 0x1.24feb2f105cb8p1, 0x1.274afbdbba4a6p1, - 0x1.299be1f3e7f1cp1, 0x1.2bf16e7d2a38cp1, 0x1.2e4baacdb6614p1, - 0x1.30aaa04e80d05p1, 0x1.330e587b62b28p1, 0x1.3576dce33feadp1, - 0x1.37e437282d4eep1, 0x1.3a5670ff972edp1, 0x1.3ccd9432682b4p1, - 0x1.3f49aa9d30590p1, 0x1.41cabe304cb34p1, 0x1.4450d8f00edd4p1, - 0x1.46dc04f4e5338p1, 0x1.496c4c6b832dap1, 0x1.4c01b9950a111p1, - 0x1.4e9c56c731f5dp1, 0x1.513c2e6c731d7p1, 0x1.53e14b042f9cap1, - 0x1.568bb722dd593p1, 0x1.593b7d72305bbp1, -}; - +// Wolfram alpha: N[Table[2^x-1,{x,-16/32,15/32,1/32}],27] +const double EXP_M[EXP_num_p] = {-0.292893218813452475599155638, + -0.277409596511476689981496879, + -0.261586927030250344306546260, + -0.245417786203288630116990224, + -0.228894587296029588193854069, + -0.212009577446056756772364920, + -0.194754834025372845910239666, + -0.177122260923017577740621638, + -0.159103584746285456968874524, + -0.140690350938761042185327812, + -0.121873919813350258443919690, + -0.102645462498446406786148379, + -0.0829959567953287682564584052, + -0.0629161829448500493350005250, + -0.0423967193014263530636943649, + -0.0214279379122998654908388742, + 0.0, + 0.0218971486541166782344801348, + 0.0442737824274138403219664787, + 0.0671404006768236181695211210, + 0.0905077326652576592070106558, + 0.114386742595892536308812957, + 0.138788634756691653703830284, + 0.163724858777577513813573599, + 0.189207115002721066717499971, + 0.215247359980468878116520251, + 0.241857812073484048593677469, + 0.269050957191733222554419081, + 0.296839554651009665933754118, + 0.325236643159741294629537095, + 0.354255546936892728298014740, + 0.383909881963831954872659527}; } // namespace __llvm_libc diff --git a/libc/src/math/generic/exp2f.cpp b/libc/src/math/generic/exp2f.cpp --- a/libc/src/math/generic/exp2f.cpp +++ b/libc/src/math/generic/exp2f.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/math/exp2f.h" +#include "common_constants.h" #include "src/__support/FPUtil/BasicOperations.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FMA.h" @@ -18,34 +19,14 @@ namespace __llvm_libc { -// Lookup table for 2^(m * 2^(-6)) with m = 0, ..., 63. -// Table is generated with Sollya as follow: -// > display = hexadecimal; -// > for i from 0 to 63 do { D(2^(i / 64)); }; -static constexpr double EXP_M[64] = { - 0x1.0000000000000p0, 0x1.02c9a3e778061p0, 0x1.059b0d3158574p0, - 0x1.0874518759bc8p0, 0x1.0b5586cf9890fp0, 0x1.0e3ec32d3d1a2p0, - 0x1.11301d0125b51p0, 0x1.1429aaea92de0p0, 0x1.172b83c7d517bp0, - 0x1.1a35beb6fcb75p0, 0x1.1d4873168b9aap0, 0x1.2063b88628cd6p0, - 0x1.2387a6e756238p0, 0x1.26b4565e27cddp0, 0x1.29e9df51fdee1p0, - 0x1.2d285a6e4030bp0, 0x1.306fe0a31b715p0, 0x1.33c08b26416ffp0, - 0x1.371a7373aa9cbp0, 0x1.3a7db34e59ff7p0, 0x1.3dea64c123422p0, - 0x1.4160a21f72e2ap0, 0x1.44e086061892dp0, 0x1.486a2b5c13cd0p0, - 0x1.4bfdad5362a27p0, 0x1.4f9b2769d2ca7p0, 0x1.5342b569d4f82p0, - 0x1.56f4736b527dap0, 0x1.5ab07dd485429p0, 0x1.5e76f15ad2148p0, - 0x1.6247eb03a5585p0, 0x1.6623882552225p0, 0x1.6a09e667f3bcdp0, - 0x1.6dfb23c651a2fp0, 0x1.71f75e8ec5f74p0, 0x1.75feb564267c9p0, - 0x1.7a11473eb0187p0, 0x1.7e2f336cf4e62p0, 0x1.82589994cce13p0, - 0x1.868d99b4492edp0, 0x1.8ace5422aa0dbp0, 0x1.8f1ae99157736p0, - 0x1.93737b0cdc5e5p0, 0x1.97d829fde4e50p0, 0x1.9c49182a3f090p0, - 0x1.a0c667b5de565p0, 0x1.a5503b23e255dp0, 0x1.a9e6b5579fdbfp0, - 0x1.ae89f995ad3adp0, 0x1.b33a2b84f15fbp0, 0x1.b7f76f2fb5e47p0, - 0x1.bcc1e904bc1d2p0, 0x1.c199bdd85529cp0, 0x1.c67f12e57d14bp0, - 0x1.cb720dcef9069p0, 0x1.d072d4a07897cp0, 0x1.d5818dcfba487p0, - 0x1.da9e603db3285p0, 0x1.dfc97337b9b5fp0, 0x1.e502ee78b3ff6p0, - 0x1.ea4afa2a490dap0, 0x1.efa1bee615a27p0, 0x1.f50765b6e4540p0, - 0x1.fa7c1819e90d8p0, -}; +constexpr double l2h = 0.6931471787393093109130859375; +constexpr double l2l = 1.8206359985041461839581765680755E-9; +constexpr double mlp = double(EXP_num_p); +constexpr double mld = (1.0 / mlp); + +constexpr uint32_t exval1 = 0x3b42'9d37U; +constexpr uint32_t exval2 = 0xbcf3'a937U; +constexpr uint32_t exval_mask = exval1 & exval2; LLVM_LIBC_FUNCTION(float, exp2f, (float x)) { using FPBits = typename fputil::FPBits; @@ -54,36 +35,6 @@ uint32_t x_u = xbits.uintval(); uint32_t x_abs = x_u & 0x7fff'ffffU; - // Exceptional values. - switch (x_u) { - case 0x3b42'9d37U: // x = 0x1.853a6ep-9f - if (fputil::get_round() == FE_TONEAREST) - return 0x1.00870ap+0f; - break; - case 0x3c02'a9adU: // x = 0x1.05535ap-7f - if (fputil::get_round() == FE_TONEAREST) - return 0x1.016b46p+0f; - break; - case 0x3ca6'6e26U: { // x = 0x1.4cdc4cp-6f - int round_mode = fputil::get_round(); - if (round_mode == FE_TONEAREST || round_mode == FE_UPWARD) - return 0x1.03a16ap+0f; - return 0x1.03a168p+0f; - } - case 0x3d92'a282U: // x = 0x1.254504p-4f - if (fputil::get_round() == FE_UPWARD) - return 0x1.0d0688p+0f; - return 0x1.0d0686p+0f; - case 0xbcf3'a937U: // x = -0x1.e7526ep-6f - if (fputil::get_round() == FE_TONEAREST) - return 0x1.f58d62p-1f; - break; - case 0xb8d3'd026U: // x = -0x1.a7a04cp-14f - if (fputil::get_round() == FE_TONEAREST) - return 0x1.fff6d2p-1f; - break; - } - // // When |x| >= 128, |x| < 2^-25, or x is nan if (unlikely(x_abs >= 0x4300'0000U || x_abs <= 0x3280'0000U)) { // |x| < 2^-25 @@ -101,7 +52,7 @@ errno = ERANGE; } // x is +inf or nan - return x + static_cast(FPBits::inf()); + return x + FPBits::inf().get_val(); } // x < -150 if (x_u >= 0xc316'0000U) { @@ -112,54 +63,42 @@ if (xbits.is_nan()) return x; if (fputil::get_round() == FE_UPWARD) - return static_cast(FPBits(FPBits::MIN_SUBNORMAL)); + return FPBits(FPBits::MIN_SUBNORMAL).get_val(); if (x != 0.0f) errno = ERANGE; return 0.0f; } } - // For -150 <= x < 128, to compute 2^x, we perform the following range - // reduction: find hi, mid, lo such that: - // x = hi + mid + lo, in which - // hi is an integer, - // mid * 2^6 is an integer - // -2^(-7) <= lo < 2^-7. - // In particular, - // hi + mid = round(x * 2^6) * 2^(-6). - // Then, - // 2^(x) = 2^(hi + mid + lo) = 2^hi * 2^mid * 2^lo. - // Multiply by 2^hi is simply adding hi to the exponent field. We store - // exp(mid) in the lookup tables EXP_M. exp(lo) is computed using a degree-4 - // minimax polynomial generated by Sollya. - // x_hi = round(hi + mid). - // The default rounding mode for float-to-int conversion in C++ is - // round-toward-zero. To make it round-to-nearest, we add (-1)^sign(x) * 0.5 - // before conversion. - int x_hi = - static_cast(x * 0x1.0p+6f + (xbits.get_sign() ? -0.5f : 0.5f)); - // For 2-complement integers, arithmetic right shift is the same as dividing - // by a power of 2 and then round down (toward negative infinity). - int e_hi = (x_hi >> 6) + - static_cast(fputil::FloatProperties::EXPONENT_BIAS); - fputil::FPBits exp_hi( - static_cast(e_hi) - << fputil::FloatProperties::MANTISSA_WIDTH); - // mid = x_hi & 0x0000'003fU; - double exp_hi_mid = static_cast(exp_hi) * EXP_M[x_hi & 0x3f]; - // Subtract (hi + mid) from x to get lo. - x -= static_cast(x_hi) * 0x1.0p-6f; - double xd = static_cast(x); - // Degree-4 minimax polynomial generated by Sollya with the following - // commands: - // > display = hexadecimal; - // > Q = fpminimax((2^x - 1)/x, 3, [|D...|], [-2^-7, 2^-7]); - // > Q; - double exp_lo = - fputil::polyeval(xd, 0x1p0, 0x1.62e42fefa2417p-1, 0x1.ebfbdff82f809p-3, - 0x1.c6b0b92131c47p-5, 0x1.3b2ab6fb568a3p-7); - double result = exp_hi_mid * exp_lo; - return static_cast(result); + if (unlikely(x_u & exval_mask) == exval_mask) { + if (unlikely(x_u == exval1)) { // x = 0x1.853a6ep-9f + if (fputil::get_round() == FE_TONEAREST) + return 0x1.00870ap+0f; + } else if (unlikely(x_u == exval2)) { // x = -0x1.e7526ep-6f + if (fputil::get_round() == FE_TONEAREST) + return 0x1.f58d62p-1f; + } + } + + double xdbl = x; + int ps = xdbl * mlp + 16384.5; + double psx = mld * (ps - 16384.0); + ps += 1 << (EXP_bits_p - 1); + int dg = (ps >> EXP_bits_p) - (16384 >> EXP_bits_p); + double ml = EXP_M[ps & (EXP_num_p - 1)]; + double dx = xdbl - psx; + // N[Table[Ln[2]^n/n!,{n,1,6}],30] + double pe = dx * fputil::polyeval(dx, l2l, 0.240226506959100712333551263163, + 0.0555041086648215799531422637686, + 0.00961812910762847716197907157366, + 0.00133335581464284434234122219880, + 0.000154035303933816099544370973327) + + l2h * dx; + + double result = ml * pe + pe + ml + 1.0; + fputil::FPBits bs(result); + bs.set_unbiased_exponent(bs.get_unbiased_exponent() + dg); + return bs.get_val(); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/expf.cpp b/libc/src/math/generic/expf.cpp --- a/libc/src/math/generic/expf.cpp +++ b/libc/src/math/generic/expf.cpp @@ -7,100 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/expf.h" -#include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2. -#include "src/__support/FPUtil/BasicOperations.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FMA.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/common.h" - -#include +#include "src/math/generic/expxf.h" namespace __llvm_libc { -LLVM_LIBC_FUNCTION(float, expf, (float x)) { - using FPBits = typename fputil::FPBits; - FPBits xbits(x); - - uint32_t x_u = xbits.uintval(); - uint32_t x_abs = x_u & 0x7fff'ffffU; - - // Exceptional values - if (unlikely(x_u == 0xc236'bd8cU)) { // x = -0x1.6d7b18p+5f - return 0x1.108a58p-66f - x * 0x1.0p-95f; - } - - // When |x| >= 89, |x| < 2^-25, or x is nan - if (unlikely(x_abs >= 0x42b2'0000U || x_abs <= 0x3280'0000U)) { - // |x| < 2^-25 - if (xbits.get_unbiased_exponent() <= 101) { - return 1.0f + x; - } - - // When x < log(2^-150) or nan - if (xbits.uintval() >= 0xc2cf'f1b5U) { - // exp(-Inf) = 0 - if (xbits.is_inf()) - return 0.0f; - // exp(nan) = nan - if (xbits.is_nan()) - return x; - if (fputil::get_round() == FE_UPWARD) - return static_cast(FPBits(FPBits::MIN_SUBNORMAL)); - errno = ERANGE; - return 0.0f; - } - // x >= 89 or nan - if (!xbits.get_sign() && (xbits.uintval() >= 0x42b2'0000)) { - // x is finite - if (xbits.uintval() < 0x7f80'0000U) { - int rounding = fputil::get_round(); - if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) - return static_cast(FPBits(FPBits::MAX_NORMAL)); - - errno = ERANGE; - } - // x is +inf or nan - return x + static_cast(FPBits::inf()); - } - } - // For -104 < x < 89, to compute exp(x), we perform the following range - // reduction: find hi, mid, lo such that: - // x = hi + mid + lo, in which - // hi is an integer, - // mid * 2^7 is an integer - // -2^(-8) <= lo < 2^-8. - // In particular, - // hi + mid = round(x * 2^7) * 2^(-7). - // Then, - // exp(x) = exp(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo). - // We store exp(hi) and exp(mid) in the lookup tables EXP_M1 and EXP_M2 - // respectively. exp(lo) is computed using a degree-4 minimax polynomial - // generated by Sollya. - - // x_hi = (hi + mid) * 2^7 = round(x * 2^7). - // The default rounding mode for float-to-int conversion in C++ is - // round-toward-zero. To make it round-to-nearest, we add (-1)^sign(x) * 0.5 - // before conversion. - int x_hi = static_cast(x * 0x1.0p7f + (xbits.get_sign() ? -0.5f : 0.5f)); - // Subtract (hi + mid) from x to get lo. - x -= static_cast(x_hi) * 0x1.0p-7f; - double xd = static_cast(x); - x_hi += 104 << 7; - // hi = x_hi >> 7 - double exp_hi = EXP_M1[x_hi >> 7]; - // mid * 2^7 = x_hi & 0x0000'007fU; - double exp_mid = EXP_M2[x_hi & 0x7f]; - // Degree-4 minimax polynomial generated by Sollya with the following - // commands: - // > display = hexadecimal; - // > Q = fpminimax(expm1(x)/x, 3, [|D...|], [-2^-8, 2^-8]); - // > Q; - double exp_lo = - fputil::polyeval(xd, 0x1p0, 0x1.ffffffffff777p-1, 0x1.000000000071cp-1, - 0x1.555566668e5e7p-3, 0x1.55555555ef243p-5); - return static_cast(exp_hi * exp_mid * exp_lo); -} +LLVM_LIBC_FUNCTION(float, expf, (float x)) { return expxf(x); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/expm1f.cpp b/libc/src/math/generic/expm1f.cpp --- a/libc/src/math/generic/expm1f.cpp +++ b/libc/src/math/generic/expm1f.cpp @@ -7,151 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/expm1f.h" -#include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2. -#include "src/__support/FPUtil/BasicOperations.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FMA.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/common.h" - -#include +#include "src/math/generic/expxf.h" namespace __llvm_libc { -LLVM_LIBC_FUNCTION(float, expm1f, (float x)) { - using FPBits = typename fputil::FPBits; - FPBits xbits(x); - - uint32_t x_u = xbits.uintval(); - uint32_t x_abs = x_u & 0x7fff'ffffU; - - // Exceptional value - if (unlikely(x_u == 0x3e35'bec5U)) { // x = 0x1.6b7d8ap-3f - int round_mode = fputil::get_round(); - if (round_mode == FE_TONEAREST || round_mode == FE_UPWARD) - return 0x1.8dbe64p-3f; - return 0x1.8dbe62p-3f; - } - -#if !defined(LIBC_TARGET_HAS_FMA) - if (unlikely(x_u == 0xbdc1'c6cbU)) { // x = -0x1.838d96p-4f - int round_mode = fputil::get_round(); - if (round_mode == FE_TONEAREST || round_mode == FE_DOWNWARD) - return -0x1.71c884p-4f; - return -0x1.71c882p-4f; - } -#endif // LIBC_TARGET_HAS_FMA - - // When |x| > 25*log(2), or nan - if (unlikely(x_abs >= 0x418a'a123U)) { - // x < log(2^-25) - if (xbits.get_sign()) { - // exp(-Inf) = 0 - if (xbits.is_inf()) - return -1.0f; - // exp(nan) = nan - if (xbits.is_nan()) - return x; - int round_mode = fputil::get_round(); - if (round_mode == FE_UPWARD || round_mode == FE_TOWARDZERO) - return -0x1.ffff'fep-1f; // -1.0f + 0x1.0p-24f - return -1.0f; - } else { - // x >= 89 or nan - if (xbits.uintval() >= 0x42b2'0000) { - if (xbits.uintval() < 0x7f80'0000U) { - int rounding = fputil::get_round(); - if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) - return static_cast(FPBits(FPBits::MAX_NORMAL)); - - errno = ERANGE; - } - return x + static_cast(FPBits::inf()); - } - } - } - - // |x| < 2^-4 - if (x_abs < 0x3d80'0000U) { - // |x| < 2^-25 - if (x_abs < 0x3300'0000U) { - // x = -0.0f - if (unlikely(xbits.uintval() == 0x8000'0000U)) - return x; - // When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x - // is: - // |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x| - // = |x| - // < 2^-25 - // < epsilon(1)/2. - // So the correctly rounded values of expm1(x) are: - // = x + eps(x) if rounding mode = FE_UPWARD, - // or (rounding mode = FE_TOWARDZERO and x is - // negative), - // = x otherwise. - // To simplify the rounding decision and make it more efficient, we use - // fma(x, x, x) ~ x + x^2 instead. - // Note: to use the formula x + x^2 to decide the correct rounding, we - // do need fma(x, x, x) to prevent underflow caused by x*x when |x| < - // 2^-76. For targets without FMA instructions, we simply use double for - // intermediate results as it is more efficient than using an emulated - // version of FMA. -#if defined(LIBC_TARGET_HAS_FMA) - return fputil::fma(x, x, x); -#else - double xd = x; - return static_cast(fputil::multiply_add(xd, xd, xd)); -#endif // LIBC_TARGET_HAS_FMA - } - - // 2^-25 <= |x| < 2^-4 - double xd = static_cast(x); - double xsq = xd * xd; - // Degree-8 minimax polynomial generated by Sollya with: - // > display = hexadecimal; - // > P = fpminimax((expm1(x) - x)/x^2, 6, [|D...|], [-2^-4, 2^-4]); - double r = - fputil::polyeval(xd, 0x1p-1, 0x1.55555555557ddp-3, 0x1.55555555552fap-5, - 0x1.111110fcd58b7p-7, 0x1.6c16c1717660bp-10, - 0x1.a0241f0006d62p-13, 0x1.a01e3f8d3c06p-16); - return static_cast(fputil::multiply_add(r, xsq, xd)); - } - - // For -18 < x < 89, to compute expm1(x), we perform the following range - // reduction: find hi, mid, lo such that: - // x = hi + mid + lo, in which - // hi is an integer, - // mid * 2^7 is an integer - // -2^(-8) <= lo < 2^-8. - // In particular, - // hi + mid = round(x * 2^7) * 2^(-7). - // Then, - // expm1(x) = exp(hi + mid + lo) - 1 = exp(hi) * exp(mid) * exp(lo) - 1. - // We store exp(hi) and exp(mid) in the lookup tables EXP_M1 and EXP_M2 - // respectively. exp(lo) is computed using a degree-4 minimax polynomial - // generated by Sollya. - - // x_hi = hi + mid. - int x_hi = static_cast(x * 0x1.0p7f + (xbits.get_sign() ? -0.5f : 0.5f)); - // Subtract (hi + mid) from x to get lo. - x -= static_cast(x_hi) * 0x1.0p-7f; - double xd = static_cast(x); - x_hi += 104 << 7; - // hi = x_hi >> 7 - double exp_hi = EXP_M1[x_hi >> 7]; - // lo = x_hi & 0x0000'007fU; - double exp_mid = EXP_M2[x_hi & 0x7f]; - double exp_hi_mid = exp_hi * exp_mid; - // Degree-4 minimax polynomial generated by Sollya with the following - // commands: - // > display = hexadecimal; - // > Q = fpminimax(expm1(x)/x, 3, [|D...|], [-2^-8, 2^-8]); - // > Q; - double exp_lo = - fputil::polyeval(xd, 0x1.0p0, 0x1.ffffffffff777p-1, 0x1.000000000071cp-1, - 0x1.555566668e5e7p-3, 0x1.55555555ef243p-5); - return static_cast(fputil::multiply_add(exp_hi_mid, exp_lo, -1.0)); -} +LLVM_LIBC_FUNCTION(float, expm1f, (float x)) { return expxf(x); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/expxf.h b/libc/src/math/generic/expxf.h new file mode 100644 --- /dev/null +++ b/libc/src/math/generic/expxf.h @@ -0,0 +1,116 @@ +//===-- Single-precision general exp function -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXPXF_H +#define LLVM_LIBC_SRC_MATH_GENERIC_EXPXF_H + +#include "common_constants.h" // Lookup tables EXP_M +#include "math_utils.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/common.h" +#include + +#include + +namespace __llvm_libc { + +static constexpr double ln2inv = 1.44269504088896340735L * EXP_num_p; +static constexpr double ln2h = + 0.693147180558298714458942413330078125L / EXP_num_p; +static constexpr double ln2l = + 1.646594958289708128098443075500134360E-12L / EXP_num_p; + +template static inline float expxf(float x) { + using FPBits = typename fputil::FPBits; + FPBits xbits(x); + + uint32_t x_u = xbits.uintval(); + uint32_t x_abs = x_u & 0x7fff'ffffU; + + // When |x| >= 89, |x| < 2^-25, or x is nan + if (unlikely(x_abs >= 0x42b2'0000U || x_abs <= 0x3280'0000U)) { + // |x| < 2^-25 + if constexpr (!EXPM1) { + if (xbits.get_unbiased_exponent() <= 101) { + return 1.0f + x; + } + } else { + if (xbits.get_unbiased_exponent() <= 101) { + return unlikely(x_abs == 0) ? x : (x + 0.5 * x * x); + } + } + + // When x < log(2^-150) or nan + if (xbits.uintval() >= 0xc2cf'f1b5U) { + // exp(-Inf) = 0 + if (xbits.is_inf()) { + if constexpr (!EXPM1) { + return 0.0f; + } else { + return -1.0f; + } + } + // exp(nan) = nan + if (xbits.is_nan()) + return x; + if constexpr (!EXPM1) { + if (unlikely(fputil::get_round() == FE_UPWARD)) + return FPBits(FPBits::MIN_SUBNORMAL).get_val(); + errno = ERANGE; + return 0.0f; + } else { + return -1.0f + opt_barrier(FPBits(FPBits::MIN_NORMAL).get_val()); + } + } + // x >= 89 or nan + if (!xbits.get_sign() && (xbits.uintval() >= 0x42b2'0000)) { + // x is finite + if (xbits.uintval() < 0x7f80'0000U) { + int rounding = fputil::get_round(); + if (unlikely(rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)) + return FPBits(FPBits::MAX_NORMAL).get_val(); + + errno = ERANGE; + } + // x is +inf or nan + return x + FPBits::inf().get_val(); + } + } + + double xdbl = x; + int ps = ln2inv * xdbl + 16384.5; + double psx = ps - 16384.0; + ps += 1 << (EXP_bits_p - 1); + int dg = (ps >> EXP_bits_p) - (16384 >> EXP_bits_p); + double ml = EXP_M[ps & (EXP_num_p - 1)]; + double dx = (xdbl - psx * ln2h) - psx * ln2l; + double pe = dx * fputil::polyeval(dx, 1.0, 1.0 / 2.0, 1.0 / 6.0, 1.0 / 24.0, + 1.0 / 120.0, 1.0 / 720.0); + + if constexpr (!EXPM1) { + double result = ml * pe + pe + ml + 1.0; + fputil::FPBits bs(result); + bs.set_unbiased_exponent(bs.get_unbiased_exponent() + dg); + return bs.get_val(); + } else { + if (dg == 0) { + return ml * pe + pe + ml; + } else { + double result = ml * pe + pe + ml + 1.0; + fputil::FPBits bs(result); + bs.set_unbiased_exponent(bs.get_unbiased_exponent() + dg); + return bs.get_val() - 1.0; + } + } +} + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPXF_H diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -1196,7 +1196,7 @@ add_fp_unittest( expm1f_test NEED_MPFR - SUITE + SUITE libc_math_unittests SRCS expm1f_test.cpp diff --git a/libc/test/src/math/exp2f_test.cpp b/libc/test/src/math/exp2f_test.cpp --- a/libc/test/src/math/exp2f_test.cpp +++ b/libc/test/src/math/exp2f_test.cpp @@ -54,18 +54,18 @@ TEST(LlvmLibcExp2fTest, TrickyInputs) { constexpr int N = 12; constexpr uint32_t INPUTS[N] = { - 0x3b429d37U, /*0x1.853a6ep-9f*/ - 0x3c02a9adU, /*0x1.05535ap-7f*/ - 0x3ca66e26U, /*0x1.4cdc4cp-6f*/ - 0x3d92a282U, /*0x1.254504p-4f*/ - 0x42fa0001U, /*0x1.f40002p+6f*/ - 0x42ffffffU, /*0x1.fffffep+6f*/ - 0xb8d3d026U, /*-0x1.a7a04cp-14f*/ - 0xbcf3a937U, /*-0x1.e7526ep-6f*/ - 0xc2fa0001U, /*-0x1.f40002p+6f*/ - 0xc2fc0000U, /*-0x1.f8p+6f*/ - 0xc2fc0001U, /*-0x1.f80002p+6f*/ - 0xc3150000U, /*-0x1.2ap+7f*/ + 0x3b429d37U, // 0x1.853a6ep-9f + 0x3c02a9adU, // 0x1.05535ap-7f + 0x3ca66e26U, // 0x1.4cdc4cp-6f + 0x3d92a282U, // 0x1.254504p-4f + 0x42fa0001U, // 0x1.f40002p+6f + 0x42ffffffU, // 0x1.fffffep+6f + 0xb8d3d026U, // -0x1.a7a04cp-14f + 0xbcf3a937U, // -0x1.e7526ep-6f + 0xc2fa0001U, // -0x1.f40002p+6f + 0xc2fc0000U, // -0x1.f8p+6f + 0xc2fc0001U, // -0x1.f80002p+6f + 0xc3150000U, // -0x1.2ap+7f }; for (int i = 0; i < N; ++i) { errno = 0; diff --git a/libc/test/src/math/expf_test.cpp b/libc/test/src/math/expf_test.cpp --- a/libc/test/src/math/expf_test.cpp +++ b/libc/test/src/math/expf_test.cpp @@ -73,6 +73,22 @@ float x; errno = 0; + x = float(FPBits(0xbbf0edf1U)); + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Exp, x, __llvm_libc::expf(x), + 0.5); + EXPECT_MATH_ERRNO(0); + + errno = 0; + x = float(FPBits(0xbbf0'edf1U)); + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Exp, x, __llvm_libc::expf(x), + 0.5); + EXPECT_MATH_ERRNO(0); + + x = float(FPBits(0xc169'12cdU)); + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Exp, x, __llvm_libc::expf(x), + 0.5); + EXPECT_MATH_ERRNO(0); + x = float(FPBits(0x42affff8U)); ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Exp, x, __llvm_libc::expf(x), 0.5); diff --git a/libc/test/src/math/expm1f_test.cpp b/libc/test/src/math/expm1f_test.cpp --- a/libc/test/src/math/expm1f_test.cpp +++ b/libc/test/src/math/expm1f_test.cpp @@ -68,6 +68,17 @@ float x; errno = 0; + x = -120.0; + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x, + __llvm_libc::expm1f(x), 0.5); + EXPECT_MATH_ERRNO(0); + + errno = 0; + x = float(FPBits(0x989e18e7U)); + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x, + __llvm_libc::expm1f(x), 0.5); + EXPECT_MATH_ERRNO(0); + x = float(FPBits(0x42affff8U)); ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x, __llvm_libc::expm1f(x), 0.5);