| 
 | 1 | +//===-- Implementation header for cos ---------------------------*- C++ -*-===//  | 
 | 2 | +//  | 
 | 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.  | 
 | 4 | +// See https://llvm.org/LICENSE.txt for license information.  | 
 | 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception  | 
 | 6 | +//  | 
 | 7 | +//===----------------------------------------------------------------------===//  | 
 | 8 | + | 
 | 9 | +#ifndef LIBC_SRC___SUPPORT_MATH_COS_H  | 
 | 10 | +#define LIBC_SRC___SUPPORT_MATH_COS_H  | 
 | 11 | + | 
 | 12 | +#include "range_reduction_double_common.h"  | 
 | 13 | +#include "sincos_eval.h"  | 
 | 14 | +#include "src/__support/FPUtil/FEnvImpl.h"  | 
 | 15 | +#include "src/__support/FPUtil/FPBits.h"  | 
 | 16 | +#include "src/__support/FPUtil/double_double.h"  | 
 | 17 | +#include "src/__support/FPUtil/dyadic_float.h"  | 
 | 18 | +#include "src/__support/FPUtil/except_value_utils.h"  | 
 | 19 | +#include "src/__support/macros/config.h"  | 
 | 20 | +#include "src/__support/macros/optimization.h"            // LIBC_UNLIKELY  | 
 | 21 | +#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA  | 
 | 22 | + | 
 | 23 | +#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE  | 
 | 24 | +#include "range_reduction_double_fma.h"  | 
 | 25 | +#else  | 
 | 26 | +#include "range_reduction_double_nofma.h"  | 
 | 27 | +#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE  | 
 | 28 | + | 
 | 29 | +namespace LIBC_NAMESPACE_DECL {  | 
 | 30 | + | 
 | 31 | +namespace math {  | 
 | 32 | + | 
 | 33 | +LIBC_INLINE static constexpr double cos(double x) {  | 
 | 34 | +  using namespace range_reduction_double_internal;  | 
 | 35 | +  using DoubleDouble = fputil::DoubleDouble;  | 
 | 36 | +  using FPBits = typename fputil::FPBits<double>;  | 
 | 37 | +  FPBits xbits(x);  | 
 | 38 | + | 
 | 39 | +  uint16_t x_e = xbits.get_biased_exponent();  | 
 | 40 | + | 
 | 41 | +  DoubleDouble y;  | 
 | 42 | +  unsigned k = 0;  | 
 | 43 | +  LargeRangeReduction range_reduction_large;  | 
 | 44 | + | 
 | 45 | +  // |x| < 2^16.  | 
 | 46 | +  if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) {  | 
 | 47 | +    // |x| < 2^-7  | 
 | 48 | +    if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 7)) {  | 
 | 49 | +      // |x| < 2^-27  | 
 | 50 | +      if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 27)) {  | 
 | 51 | +        // Signed zeros.  | 
 | 52 | +        if (LIBC_UNLIKELY(x == 0.0))  | 
 | 53 | +          return 1.0;  | 
 | 54 | + | 
 | 55 | +        // For |x| < 2^-27, |cos(x) - 1| < |x|^2/2 < 2^-54 = ulp(1 - 2^-53)/2.  | 
 | 56 | +        return fputil::round_result_slightly_down(1.0);  | 
 | 57 | +      }  | 
 | 58 | +      // No range reduction needed.  | 
 | 59 | +      k = 0;  | 
 | 60 | +      y.lo = 0.0;  | 
 | 61 | +      y.hi = x;  | 
 | 62 | +    } else {  | 
 | 63 | +      // Small range reduction.  | 
 | 64 | +      k = range_reduction_small(x, y);  | 
 | 65 | +    }  | 
 | 66 | +  } else {  | 
 | 67 | +    // Inf or NaN  | 
 | 68 | +    if (LIBC_UNLIKELY(x_e > 2 * FPBits::EXP_BIAS)) {  | 
 | 69 | +      if (xbits.is_signaling_nan()) {  | 
 | 70 | +        fputil::raise_except_if_required(FE_INVALID);  | 
 | 71 | +        return FPBits::quiet_nan().get_val();  | 
 | 72 | +      }  | 
 | 73 | +      // cos(+-Inf) = NaN  | 
 | 74 | +      if (xbits.get_mantissa() == 0) {  | 
 | 75 | +        fputil::set_errno_if_required(EDOM);  | 
 | 76 | +        fputil::raise_except_if_required(FE_INVALID);  | 
 | 77 | +      }  | 
 | 78 | +      return x + FPBits::quiet_nan().get_val();  | 
 | 79 | +    }  | 
 | 80 | + | 
 | 81 | +    // Large range reduction.  | 
 | 82 | +    k = range_reduction_large.fast(x, y);  | 
 | 83 | +  }  | 
 | 84 | + | 
 | 85 | +  DoubleDouble sin_y, cos_y;  | 
 | 86 | + | 
 | 87 | +  [[maybe_unused]] double err =  | 
 | 88 | +      math::sincos_eval_internal::sincos_eval(y, sin_y, cos_y);  | 
 | 89 | + | 
 | 90 | +  // Look up sin(k * pi/128) and cos(k * pi/128)  | 
 | 91 | +#ifdef LIBC_MATH_HAS_SMALL_TABLES  | 
 | 92 | +  // Memory saving versions.  Use 65-entry table.  | 
 | 93 | +  auto get_idx_dd = [](unsigned kk) -> DoubleDouble {  | 
 | 94 | +    unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63);  | 
 | 95 | +    DoubleDouble ans = SIN_K_PI_OVER_128[idx];  | 
 | 96 | +    if (kk & 128) {  | 
 | 97 | +      ans.hi = -ans.hi;  | 
 | 98 | +      ans.lo = -ans.lo;  | 
 | 99 | +    }  | 
 | 100 | +    return ans;  | 
 | 101 | +  };  | 
 | 102 | +  DoubleDouble msin_k = get_idx_dd(k + 128);  | 
 | 103 | +  DoubleDouble cos_k = get_idx_dd(k + 64);  | 
 | 104 | +#else  | 
 | 105 | +  // Fast look up version, but needs 256-entry table.  | 
 | 106 | +  // -sin(k * pi/128) = sin((k + 128) * pi/128)  | 
 | 107 | +  // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).  | 
 | 108 | +  DoubleDouble msin_k = SIN_K_PI_OVER_128[(k + 128) & 255];  | 
 | 109 | +  DoubleDouble cos_k = SIN_K_PI_OVER_128[(k + 64) & 255];  | 
 | 110 | +#endif // LIBC_MATH_HAS_SMALL_TABLES  | 
 | 111 | + | 
 | 112 | +  // After range reduction, k = round(x * 128 / pi) and y = x - k * (pi / 128).  | 
 | 113 | +  // So k is an integer and -pi / 256 <= y <= pi / 256.  | 
 | 114 | +  // Then cos(x) = cos((k * pi/128 + y)  | 
 | 115 | +  //             = cos(y) * cos(k*pi/128) - sin(y) * sin(k*pi/128)  | 
 | 116 | +  DoubleDouble cos_k_cos_y = fputil::quick_mult(cos_y, cos_k);  | 
 | 117 | +  DoubleDouble msin_k_sin_y = fputil::quick_mult(sin_y, msin_k);  | 
 | 118 | + | 
 | 119 | +  DoubleDouble rr = fputil::exact_add<false>(cos_k_cos_y.hi, msin_k_sin_y.hi);  | 
 | 120 | +  rr.lo += msin_k_sin_y.lo + cos_k_cos_y.lo;  | 
 | 121 | + | 
 | 122 | +#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS  | 
 | 123 | +  return rr.hi + rr.lo;  | 
 | 124 | +#else  | 
 | 125 | +  using Float128 = typename fputil::DyadicFloat<128>;  | 
 | 126 | +  double rlp = rr.lo + err;  | 
 | 127 | +  double rlm = rr.lo - err;  | 
 | 128 | + | 
 | 129 | +  double r_upper = rr.hi + rlp; // (rr.lo + ERR);  | 
 | 130 | +  double r_lower = rr.hi + rlm; // (rr.lo - ERR);  | 
 | 131 | + | 
 | 132 | +  // Ziv's rounding test.  | 
 | 133 | +  if (LIBC_LIKELY(r_upper == r_lower))  | 
 | 134 | +    return r_upper;  | 
 | 135 | + | 
 | 136 | +  Float128 u_f128, sin_u, cos_u;  | 
 | 137 | +  if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT))  | 
 | 138 | +    u_f128 = range_reduction_small_f128(x);  | 
 | 139 | +  else  | 
 | 140 | +    u_f128 = range_reduction_large.accurate();  | 
 | 141 | + | 
 | 142 | +  math::sincos_eval_internal::sincos_eval(u_f128, sin_u, cos_u);  | 
 | 143 | + | 
 | 144 | +  auto get_sin_k = [](unsigned kk) -> Float128 {  | 
 | 145 | +    unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63);  | 
 | 146 | +    Float128 ans = SIN_K_PI_OVER_128_F128[idx];  | 
 | 147 | +    if (kk & 128)  | 
 | 148 | +      ans.sign = Sign::NEG;  | 
 | 149 | +    return ans;  | 
 | 150 | +  };  | 
 | 151 | + | 
 | 152 | +  // -sin(k * pi/128) = sin((k + 128) * pi/128)  | 
 | 153 | +  // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).  | 
 | 154 | +  Float128 msin_k_f128 = get_sin_k(k + 128);  | 
 | 155 | +  Float128 cos_k_f128 = get_sin_k(k + 64);  | 
 | 156 | + | 
 | 157 | +  // cos(x) = cos((k * pi/128 + u)  | 
 | 158 | +  //        = cos(u) * cos(k*pi/128) - sin(u) * sin(k*pi/128)  | 
 | 159 | +  Float128 r = fputil::quick_add(fputil::quick_mul(cos_k_f128, cos_u),  | 
 | 160 | +                                 fputil::quick_mul(msin_k_f128, sin_u));  | 
 | 161 | + | 
 | 162 | +  // TODO: Add assertion if Ziv's accuracy tests fail in debug mode.  | 
 | 163 | +  // https://github.com/llvm/llvm-project/issues/96452.  | 
 | 164 | + | 
 | 165 | +  return static_cast<double>(r);  | 
 | 166 | +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS  | 
 | 167 | +}  | 
 | 168 | + | 
 | 169 | +} // namespace math  | 
 | 170 | + | 
 | 171 | +} // namespace LIBC_NAMESPACE_DECL  | 
 | 172 | + | 
 | 173 | +#endif // LIBC_SRC___SUPPORT_MATH_COS_H  | 
0 commit comments