| 
 | 1 | +//===-- Implementation header for exp10m1f ----------------------*- C++ -*-===//  | 
 | 2 | +//  | 
 | 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.  | 
 | 4 | +// See https://llvm.org/LICENSE.txt for license information.  | 
 | 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception  | 
 | 6 | +//  | 
 | 7 | +//===----------------------------------------------------------------------===//  | 
 | 8 | + | 
 | 9 | +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP10M1F_H  | 
 | 10 | +#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP10M1F_H  | 
 | 11 | + | 
 | 12 | +#include "exp10f_utils.h"  | 
 | 13 | +#include "src/__support/FPUtil/FEnvImpl.h"  | 
 | 14 | +#include "src/__support/FPUtil/FPBits.h"  | 
 | 15 | +#include "src/__support/FPUtil/PolyEval.h"  | 
 | 16 | +#include "src/__support/FPUtil/except_value_utils.h"  | 
 | 17 | +#include "src/__support/FPUtil/multiply_add.h"  | 
 | 18 | +#include "src/__support/FPUtil/rounding_mode.h"  | 
 | 19 | +#include "src/__support/common.h"  | 
 | 20 | +#include "src/__support/libc_errno.h"  | 
 | 21 | +#include "src/__support/macros/config.h"  | 
 | 22 | +#include "src/__support/macros/optimization.h"  | 
 | 23 | + | 
 | 24 | +namespace LIBC_NAMESPACE_DECL {  | 
 | 25 | + | 
 | 26 | +namespace math {  | 
 | 27 | + | 
 | 28 | +namespace exp10m1f_internal {  | 
 | 29 | + | 
 | 30 | +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS  | 
 | 31 | +static constexpr size_t N_EXCEPTS_LO = 11;  | 
 | 32 | + | 
 | 33 | +static constexpr fputil::ExceptValues<float, N_EXCEPTS_LO> EXP10M1F_EXCEPTS_LO =  | 
 | 34 | +    {{  | 
 | 35 | +        // x = 0x1.0fe54ep-11, exp10m1f(x) = 0x1.3937eep-10 (RZ)  | 
 | 36 | +        {0x3a07'f2a7U, 0x3a9c'9bf7U, 1U, 0U, 1U},  | 
 | 37 | +        // x = 0x1.80e6eap-11, exp10m1f(x) = 0x1.bb8272p-10 (RZ)  | 
 | 38 | +        {0x3a40'7375U, 0x3add'c139U, 1U, 0U, 1U},  | 
 | 39 | +        // x = -0x1.2a33bcp-51, exp10m1f(x) = -0x1.57515ep-50 (RZ)  | 
 | 40 | +        {0xa615'19deU, 0xa6ab'a8afU, 0U, 1U, 0U},  | 
 | 41 | +        // x = -0x0p+0, exp10m1f(x) = -0x0p+0 (RZ)  | 
 | 42 | +        {0x8000'0000U, 0x8000'0000U, 0U, 0U, 0U},  | 
 | 43 | +        // x = -0x1.b59e08p-31, exp10m1f(x) = -0x1.f7d356p-30 (RZ)  | 
 | 44 | +        {0xb05a'cf04U, 0xb0fb'e9abU, 0U, 1U, 1U},  | 
 | 45 | +        // x = -0x1.bf342p-12, exp10m1f(x) = -0x1.014e02p-10 (RZ)  | 
 | 46 | +        {0xb9df'9a10U, 0xba80'a701U, 0U, 1U, 0U},  | 
 | 47 | +        // x = -0x1.6207fp-11, exp10m1f(x) = -0x1.9746cap-10 (RZ)  | 
 | 48 | +        {0xba31'03f8U, 0xbacb'a365U, 0U, 1U, 1U},  | 
 | 49 | +        // x = -0x1.bd0c66p-11, exp10m1f(x) = -0x1.ffe168p-10 (RZ)  | 
 | 50 | +        {0xba5e'8633U, 0xbaff'f0b4U, 0U, 1U, 1U},  | 
 | 51 | +        // x = -0x1.ffd84cp-10, exp10m1f(x) = -0x1.25faf2p-8 (RZ)  | 
 | 52 | +        {0xbaff'ec26U, 0xbb92'fd79U, 0U, 1U, 0U},  | 
 | 53 | +        // x = -0x1.a74172p-9, exp10m1f(x) = -0x1.e57be2p-8 (RZ)  | 
 | 54 | +        {0xbb53'a0b9U, 0xbbf2'bdf1U, 0U, 1U, 1U},  | 
 | 55 | +        // x = -0x1.cb694cp-9, exp10m1f(x) = -0x1.0764e4p-7 (RZ)  | 
 | 56 | +        {0xbb65'b4a6U, 0xbc03'b272U, 0U, 1U, 0U},  | 
 | 57 | +    }};  | 
 | 58 | + | 
 | 59 | +static constexpr size_t N_EXCEPTS_HI = 19;  | 
 | 60 | + | 
 | 61 | +static constexpr fputil::ExceptValues<float, N_EXCEPTS_HI> EXP10M1F_EXCEPTS_HI =  | 
 | 62 | +    {{  | 
 | 63 | +        // (input, RZ output, RU offset, RD offset, RN offset)  | 
 | 64 | +        // x = 0x1.8d31eep-8, exp10m1f(x) = 0x1.cc7e4cp-7 (RZ)  | 
 | 65 | +        {0x3bc6'98f7U, 0x3c66'3f26U, 1U, 0U, 1U},  | 
 | 66 | +        // x = 0x1.915fcep-8, exp10m1f(x) = 0x1.d15f72p-7 (RZ)  | 
 | 67 | +        {0x3bc8'afe7U, 0x3c68'afb9U, 1U, 0U, 0U},  | 
 | 68 | +        // x = 0x1.bcf982p-8, exp10m1f(x) = 0x1.022928p-6 (RZ)  | 
 | 69 | +        {0x3bde'7cc1U, 0x3c81'1494U, 1U, 0U, 1U},  | 
 | 70 | +        // x = 0x1.99ff0ap-7, exp10m1f(x) = 0x1.dee416p-6 (RZ)  | 
 | 71 | +        {0x3c4c'ff85U, 0x3cef'720bU, 1U, 0U, 0U},  | 
 | 72 | +        // x = 0x1.75ea14p-6, exp10m1f(x) = 0x1.b9ff16p-5 (RZ)  | 
 | 73 | +        {0x3cba'f50aU, 0x3d5c'ff8bU, 1U, 0U, 0U},  | 
 | 74 | +        // x = 0x1.f81b64p-6, exp10m1f(x) = 0x1.2cb6bcp-4 (RZ)  | 
 | 75 | +        {0x3cfc'0db2U, 0x3d96'5b5eU, 1U, 0U, 0U},  | 
 | 76 | +        // x = 0x1.fafecp+3, exp10m1f(x) = 0x1.8c880ap+52 (RZ)  | 
 | 77 | +        {0x417d'7f60U, 0x59c6'4405U, 1U, 0U, 0U},  | 
 | 78 | +        // x = -0x1.3bf094p-8, exp10m1f(x) = -0x1.69ba4ap-7 (RZ)  | 
 | 79 | +        {0xbb9d'f84aU, 0xbc34'dd25U, 0U, 1U, 0U},  | 
 | 80 | +        // x = -0x1.4558bcp-8, exp10m1f(x) = -0x1.746fb8p-7 (RZ)  | 
 | 81 | +        {0xbba2'ac5eU, 0xbc3a'37dcU, 0U, 1U, 1U},  | 
 | 82 | +        // x = -0x1.4bb43p-8, exp10m1f(x) = -0x1.7babe4p-7 (RZ)  | 
 | 83 | +        {0xbba5'da18U, 0xbc3d'd5f2U, 0U, 1U, 1U},  | 
 | 84 | +        // x = -0x1.776cc8p-8, exp10m1f(x) = -0x1.ad62c4p-7 (RZ)  | 
 | 85 | +        {0xbbbb'b664U, 0xbc56'b162U, 0U, 1U, 0U},  | 
 | 86 | +        // x = -0x1.f024cp-8, exp10m1f(x) = -0x1.1b20d6p-6 (RZ)  | 
 | 87 | +        {0xbbf8'1260U, 0xbc8d'906bU, 0U, 1U, 1U},  | 
 | 88 | +        // x = -0x1.f510eep-8, exp10m1f(x) = -0x1.1de9aap-6 (RZ)  | 
 | 89 | +        {0xbbfa'8877U, 0xbc8e'f4d5U, 0U, 1U, 0U},  | 
 | 90 | +        // x = -0x1.0b43c4p-7, exp10m1f(x) = -0x1.30d418p-6 (RZ)  | 
 | 91 | +        {0xbc05'a1e2U, 0xbc98'6a0cU, 0U, 1U, 0U},  | 
 | 92 | +        // x = -0x1.245ee4p-7, exp10m1f(x) = -0x1.4d2b86p-6 (RZ)  | 
 | 93 | +        {0xbc12'2f72U, 0xbca6'95c3U, 0U, 1U, 0U},  | 
 | 94 | +        // x = -0x1.f9f2dap-7, exp10m1f(x) = -0x1.1e2186p-5 (RZ)  | 
 | 95 | +        {0xbc7c'f96dU, 0xbd0f'10c3U, 0U, 1U, 0U},  | 
 | 96 | +        // x = -0x1.08e42p-6, exp10m1f(x) = -0x1.2b5c4p-5 (RZ)  | 
 | 97 | +        {0xbc84'7210U, 0xbd15'ae20U, 0U, 1U, 1U},  | 
 | 98 | +        // x = -0x1.0cdc44p-5, exp10m1f(x) = -0x1.2a2152p-4 (RZ)  | 
 | 99 | +        {0xbd06'6e22U, 0xbd95'10a9U, 0U, 1U, 1U},  | 
 | 100 | +        // x = -0x1.ca4322p-5, exp10m1f(x) = -0x1.ef073p-4 (RZ)  | 
 | 101 | +        {0xbd65'2191U, 0xbdf7'8398U, 0U, 1U, 1U},  | 
 | 102 | +    }};  | 
 | 103 | +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS  | 
 | 104 | + | 
 | 105 | +} // namespace exp10m1f_internal  | 
 | 106 | + | 
 | 107 | +LIBC_INLINE static constexpr float exp10m1f(float x) {  | 
 | 108 | +  using namespace exp10m1f_internal;  | 
 | 109 | +  using FPBits = fputil::FPBits<float>;  | 
 | 110 | +  FPBits xbits(x);  | 
 | 111 | + | 
 | 112 | +  uint32_t x_u = xbits.uintval();  | 
 | 113 | +  uint32_t x_abs = x_u & 0x7fff'ffffU;  | 
 | 114 | + | 
 | 115 | +  // When x >= log10(2^128), or x is nan  | 
 | 116 | +  if (LIBC_UNLIKELY(xbits.is_pos() && x_u >= 0x421a'209bU)) {  | 
 | 117 | +    if (xbits.is_finite()) {  | 
 | 118 | +      int rounding = fputil::quick_get_round();  | 
 | 119 | +      if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)  | 
 | 120 | +        return FPBits::max_normal().get_val();  | 
 | 121 | + | 
 | 122 | +      fputil::set_errno_if_required(ERANGE);  | 
 | 123 | +      fputil::raise_except_if_required(FE_OVERFLOW);  | 
 | 124 | +    }  | 
 | 125 | + | 
 | 126 | +    // x >= log10(2^128) and 10^x - 1 rounds to +inf, or x is +inf or nan  | 
 | 127 | +    return x + FPBits::inf().get_val();  | 
 | 128 | +  }  | 
 | 129 | + | 
 | 130 | +  // When |x| <= log10(2) * 2^(-6)  | 
 | 131 | +  if (LIBC_UNLIKELY(x_abs <= 0x3b9a'209bU)) {  | 
 | 132 | +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS  | 
 | 133 | +    if (auto r = EXP10M1F_EXCEPTS_LO.lookup(x_u); LIBC_UNLIKELY(r.has_value()))  | 
 | 134 | +      return r.value();  | 
 | 135 | +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS  | 
 | 136 | + | 
 | 137 | +    double dx = x;  | 
 | 138 | +    double dx_sq = dx * dx;  | 
 | 139 | +    double c0 = dx * Exp10Base::COEFFS[0];  | 
 | 140 | +    double c1 =  | 
 | 141 | +        fputil::multiply_add(dx, Exp10Base::COEFFS[2], Exp10Base::COEFFS[1]);  | 
 | 142 | +    double c2 =  | 
 | 143 | +        fputil::multiply_add(dx, Exp10Base::COEFFS[4], Exp10Base::COEFFS[3]);  | 
 | 144 | +    // 10^dx - 1 ~ (1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5) - 1  | 
 | 145 | +    //           = COEFFS[0] * dx + ... + COEFFS[4] * dx^5  | 
 | 146 | +    return static_cast<float>(fputil::polyeval(dx_sq, c0, c1, c2));  | 
 | 147 | +  }  | 
 | 148 | + | 
 | 149 | +  // When x <= log10(2^-25), or x is nan  | 
 | 150 | +  if (LIBC_UNLIKELY(x_u >= 0xc0f0d2f1)) {  | 
 | 151 | +    // exp10m1(-inf) = -1  | 
 | 152 | +    if (xbits.is_inf())  | 
 | 153 | +      return -1.0f;  | 
 | 154 | +    // exp10m1(nan) = nan  | 
 | 155 | +    if (xbits.is_nan())  | 
 | 156 | +      return x;  | 
 | 157 | + | 
 | 158 | +    int rounding = fputil::quick_get_round();  | 
 | 159 | +    if (rounding == FE_UPWARD || rounding == FE_TOWARDZERO ||  | 
 | 160 | +        (rounding == FE_TONEAREST && x_u == 0xc0f0d2f1))  | 
 | 161 | +      return -0x1.ffff'fep-1f; // -1.0f + 0x1.0p-24f  | 
 | 162 | +
  | 
 | 163 | +    fputil::set_errno_if_required(ERANGE);  | 
 | 164 | +    fputil::raise_except_if_required(FE_UNDERFLOW);  | 
 | 165 | +    return -1.0f;  | 
 | 166 | +  }  | 
 | 167 | +
  | 
 | 168 | +  // Exact outputs when x = 1, 2, ..., 10.  | 
 | 169 | +  // Quick check mask: 0x800f'ffffU = ~(bits of 1.0f | ... | bits of 10.0f)  | 
 | 170 | +  if (LIBC_UNLIKELY((x_u & 0x800f'ffffU) == 0)) {  | 
 | 171 | +    switch (x_u) {  | 
 | 172 | +    case 0x3f800000U: // x = 1.0f  | 
 | 173 | +      return 9.0f;  | 
 | 174 | +    case 0x40000000U: // x = 2.0f  | 
 | 175 | +      return 99.0f;  | 
 | 176 | +    case 0x40400000U: // x = 3.0f  | 
 | 177 | +      return 999.0f;  | 
 | 178 | +    case 0x40800000U: // x = 4.0f  | 
 | 179 | +      return 9'999.0f;  | 
 | 180 | +    case 0x40a00000U: // x = 5.0f  | 
 | 181 | +      return 99'999.0f;  | 
 | 182 | +    case 0x40c00000U: // x = 6.0f  | 
 | 183 | +      return 999'999.0f;  | 
 | 184 | +    case 0x40e00000U: // x = 7.0f  | 
 | 185 | +      return 9'999'999.0f;  | 
 | 186 | +    case 0x41000000U: { // x = 8.0f  | 
 | 187 | +      int rounding = fputil::quick_get_round();  | 
 | 188 | +      if (rounding == FE_UPWARD || rounding == FE_TONEAREST)  | 
 | 189 | +        return 100'000'000.0f;  | 
 | 190 | +      return 99'999'992.0f;  | 
 | 191 | +    }  | 
 | 192 | +    case 0x41100000U: { // x = 9.0f  | 
 | 193 | +      int rounding = fputil::quick_get_round();  | 
 | 194 | +      if (rounding == FE_UPWARD || rounding == FE_TONEAREST)  | 
 | 195 | +        return 1'000'000'000.0f;  | 
 | 196 | +      return 999'999'936.0f;  | 
 | 197 | +    }  | 
 | 198 | +    case 0x41200000U: { // x = 10.0f  | 
 | 199 | +      int rounding = fputil::quick_get_round();  | 
 | 200 | +      if (rounding == FE_UPWARD || rounding == FE_TONEAREST)  | 
 | 201 | +        return 10'000'000'000.0f;  | 
 | 202 | +      return 9'999'998'976.0f;  | 
 | 203 | +    }  | 
 | 204 | +    }  | 
 | 205 | +  }  | 
 | 206 | + | 
 | 207 | +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS  | 
 | 208 | +  if (auto r = EXP10M1F_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value()))  | 
 | 209 | +    return r.value();  | 
 | 210 | +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS  | 
 | 211 | + | 
 | 212 | +  // Range reduction: 10^x = 2^(mid + hi) * 10^lo  | 
 | 213 | +  //   rr = (2^(mid + hi), lo)  | 
 | 214 | +  auto rr = exp_b_range_reduc<Exp10Base>(x);  | 
 | 215 | + | 
 | 216 | +  // The low part is approximated by a degree-5 minimax polynomial.  | 
 | 217 | +  // 10^lo ~ 1 + COEFFS[0] * lo + ... + COEFFS[4] * lo^5  | 
 | 218 | +  double lo_sq = rr.lo * rr.lo;  | 
 | 219 | +  double c0 = fputil::multiply_add(rr.lo, Exp10Base::COEFFS[0], 1.0);  | 
 | 220 | +  double c1 =  | 
 | 221 | +      fputil::multiply_add(rr.lo, Exp10Base::COEFFS[2], Exp10Base::COEFFS[1]);  | 
 | 222 | +  double c2 =  | 
 | 223 | +      fputil::multiply_add(rr.lo, Exp10Base::COEFFS[4], Exp10Base::COEFFS[3]);  | 
 | 224 | +  double exp10_lo = fputil::polyeval(lo_sq, c0, c1, c2);  | 
 | 225 | +  // 10^x - 1 = 2^(mid + hi) * 10^lo - 1  | 
 | 226 | +  //          ~ mh * exp10_lo - 1  | 
 | 227 | +  return static_cast<float>(fputil::multiply_add(exp10_lo, rr.mh, -1.0));  | 
 | 228 | +}  | 
 | 229 | + | 
 | 230 | +} // namespace math  | 
 | 231 | + | 
 | 232 | +} // namespace LIBC_NAMESPACE_DECL  | 
 | 233 | + | 
 | 234 | +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP10M1F_H  | 
0 commit comments