diff --git a/libc/shared/math.h b/libc/shared/math.h
index e3f7965e19a55..79ba2ea5aa6ff 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -50,8 +50,14 @@
 #include "math/exp2.h"
 #include "math/exp2f.h"
 #include "math/exp2f16.h"
+#include "math/exp2m1f.h"
+#include "math/exp2m1f16.h"
 #include "math/expf.h"
 #include "math/expf16.h"
+#include "math/expm1.h"
+#include "math/expm1f.h"
+#include "math/expm1f16.h"
+#include "math/fma.h"
 #include "math/frexpf.h"
 #include "math/frexpf128.h"
 #include "math/frexpf16.h"
diff --git a/libc/shared/math/exp2m1f.h b/libc/shared/math/exp2m1f.h
new file mode 100644
index 0000000000000..ca9754774f0fc
--- /dev/null
+++ b/libc/shared/math/exp2m1f.h
@@ -0,0 +1,23 @@
+//===-- Shared exp2m1f function ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_EXP2M1F_H
+#define LLVM_LIBC_SHARED_MATH_EXP2M1F_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/exp2m1f.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::exp2m1f;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_EXP2M1F_H
diff --git a/libc/shared/math/exp2m1f16.h b/libc/shared/math/exp2m1f16.h
new file mode 100644
index 0000000000000..96a404708be18
--- /dev/null
+++ b/libc/shared/math/exp2m1f16.h
@@ -0,0 +1,29 @@
+//===-- Shared exp2m1f16 function -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_EXP2M1F16_H
+#define LLVM_LIBC_SHARED_MATH_EXP2M1F16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+#include "shared/libc_common.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/math/exp2m1f16.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::exp2m1f16;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SHARED_MATH_EXP2M1F16_H
diff --git a/libc/shared/math/expm1.h b/libc/shared/math/expm1.h
new file mode 100644
index 0000000000000..4c8dbdc013a11
--- /dev/null
+++ b/libc/shared/math/expm1.h
@@ -0,0 +1,23 @@
+//===-- Shared expm1 function -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_EXPM1_H
+#define LLVM_LIBC_SHARED_MATH_EXPM1_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/expm1.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::expm1;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_EXPM1_H
diff --git a/libc/shared/math/expm1f.h b/libc/shared/math/expm1f.h
new file mode 100644
index 0000000000000..e0cf6a846f116
--- /dev/null
+++ b/libc/shared/math/expm1f.h
@@ -0,0 +1,23 @@
+//===-- Shared expm1f function ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_EXPM1F_H
+#define LLVM_LIBC_SHARED_MATH_EXPM1F_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/expm1f.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::expm1f;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_EXPM1F_H
diff --git a/libc/shared/math/expm1f16.h b/libc/shared/math/expm1f16.h
new file mode 100644
index 0000000000000..5698400d7066a
--- /dev/null
+++ b/libc/shared/math/expm1f16.h
@@ -0,0 +1,29 @@
+//===-- Shared expm1f16 function --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_EXPM1F16_H
+#define LLVM_LIBC_SHARED_MATH_EXPM1F16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+#include "shared/libc_common.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/math/expm1f16.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::expm1f16;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SHARED_MATH_EXPM1F16_H
diff --git a/libc/shared/math/fma.h b/libc/shared/math/fma.h
new file mode 100644
index 0000000000000..82f1dac61dda2
--- /dev/null
+++ b/libc/shared/math/fma.h
@@ -0,0 +1,23 @@
+//===-- Shared fma function -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_FMA_H
+#define LLVM_LIBC_SHARED_MATH_FMA_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/fma.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::fma;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_FMA_H
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index 9685496c0ec41..1911481d0649e 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -593,6 +593,14 @@ add_header_library(
     libc.src.__support.math.exp10_float16_constants
 )
 
+add_header_library(
+  fma
+  HDRS
+    fma.h
+  DEPENDS
+    libc.src.__support.FPUtil.fma
+)
+
 add_header_library(
   frexpf128
   HDRS
@@ -751,6 +759,42 @@ add_header_library(
     libc.src.__support.macros.optimization
 )
 
+add_header_library(
+  exp2m1f
+  HDRS
+    exp2m1f.h
+  DEPENDS
+    .exp10f_utils
+    libc.src.errno.errno
+    libc.src.__support.common
+    libc.src.__support.FPUtil.except_value_utils
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.macros.optimization
+    libc.src.__support.macros.properties.cpu_features
+)
+
+add_header_library(
+  exp2m1f16
+  HDRS
+    exp2m1f16.h
+  DEPENDS
+    .expxf16_utils
+    libc.src.__support.common
+    libc.src.__support.FPUtil.cast
+    libc.src.__support.FPUtil.except_value_utils
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.macros.optimization
+    libc.src.__support.macros.properties.cpu_features
+)
+
 add_header_library(
   exp10
   HDRS
@@ -834,6 +878,59 @@ add_header_library(
     libc.src.__support.macros.properties.cpu_features
 )
 
+add_header_library(
+  expm1
+  HDRS
+    expm1.h
+  DEPENDS
+    .common_constants
+    .exp_constants
+    libc.src.__support.CPP.bit
+    libc.src.__support.FPUtil.dyadic_float
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.FPUtil.triple_double
+    libc.src.__support.integer_literals
+    libc.src.__support.macros.optimization
+    libc.src.errno.errno
+)
+
+add_header_library(
+  expm1f
+  HDRS
+    expm1f.h
+  DEPENDS
+    .common_constants
+    libc.src.__support.FPUtil.basic_operations
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.nearest_integer
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.macros.optimization
+    libc.src.errno.errno
+)
+
+add_header_library(
+  expm1f16
+  HDRS
+    expm1f16.h
+  DEPENDS
+    .expxf16_utils
+    libc.src.__support.FPUtil.cast
+    libc.src.__support.FPUtil.except_value_utils
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.macros.optimization
+)
+
 add_header_library(
   range_reduction_double
   HDRS
diff --git a/libc/src/__support/math/exp2m1f.h b/libc/src/__support/math/exp2m1f.h
new file mode 100644
index 0000000000000..e95076c9eac22
--- /dev/null
+++ b/libc/src/__support/math/exp2m1f.h
@@ -0,0 +1,195 @@
+//===-- Implementation header for exp2m1f ------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F_H
+
+#include "exp10f_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+#include "src/__support/macros/properties/cpu_features.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float exp2m1f(float x) {
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  constexpr size_t N_EXCEPTS_LO = 8;
+
+  constexpr fputil::ExceptValues<float, N_EXCEPTS_LO> EXP2M1F_EXCEPTS_LO = {{
+      // (input, RZ output, RU offset, RD offset, RN offset)
+      // x = 0x1.36dc8ep-36, exp2m1f(x) = 0x1.aef212p-37 (RZ)
+      {0x2d9b'6e47U, 0x2d57'7909U, 1U, 0U, 0U},
+      // x = 0x1.224936p-19, exp2m1f(x) = 0x1.926c0ep-20 (RZ)
+      {0x3611'249bU, 0x35c9'3607U, 1U, 0U, 1U},
+      // x = 0x1.d16d2p-20, exp2m1f(x) = 0x1.429becp-20 (RZ)
+      {0x35e8'b690U, 0x35a1'4df6U, 1U, 0U, 1U},
+      // x = 0x1.17949ep-14, exp2m1f(x) = 0x1.8397p-15 (RZ)
+      {0x388b'ca4fU, 0x3841'cb80U, 1U, 0U, 1U},
+      // x = -0x1.9c3e1ep-38, exp2m1f(x) = -0x1.1dbeacp-38 (RZ)
+      {0xacce'1f0fU, 0xac8e'df56U, 0U, 1U, 0U},
+      // x = -0x1.4d89b4p-32, exp2m1f(x) = -0x1.ce61b6p-33 (RZ)
+      {0xafa6'c4daU, 0xaf67'30dbU, 0U, 1U, 1U},
+      // x = -0x1.a6eac4p-10, exp2m1f(x) = -0x1.24fadap-10 (RZ)
+      {0xbad3'7562U, 0xba92'7d6dU, 0U, 1U, 1U},
+      // x = -0x1.e7526ep-6, exp2m1f(x) = -0x1.4e53dep-6 (RZ)
+      {0xbcf3'a937U, 0xbca7'29efU, 0U, 1U, 1U},
+  }};
+
+  constexpr size_t N_EXCEPTS_HI = 3;
+
+  constexpr fputil::ExceptValues<float, N_EXCEPTS_HI> EXP2M1F_EXCEPTS_HI = {{
+      // (input, RZ output, RU offset, RD offset, RN offset)
+      // x = 0x1.16a972p-1, exp2m1f(x) = 0x1.d545b2p-2 (RZ)
+      {0x3f0b'54b9U, 0x3eea'a2d9U, 1U, 0U, 0U},
+      // x = -0x1.9f12acp-5, exp2m1f(x) = -0x1.1ab68cp-5 (RZ)
+      {0xbd4f'8956U, 0xbd0d'5b46U, 0U, 1U, 0U},
+      // x = -0x1.de7b9cp-5, exp2m1f(x) = -0x1.4508f4p-5 (RZ)
+      {0xbd6f'3dceU, 0xbd22'847aU, 0U, 1U, 1U},
+  }};
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+  using FPBits = fputil::FPBits<float>;
+  FPBits xbits(x);
+
+  uint32_t x_u = xbits.uintval();
+  uint32_t x_abs = x_u & 0x7fff'ffffU;
+
+  // When |x| >= 128, or x is nan, or |x| <= 2^-5
+  if (LIBC_UNLIKELY(x_abs >= 0x4300'0000U || x_abs <= 0x3d00'0000U)) {
+    // |x| <= 2^-5
+    if (x_abs <= 0x3d00'0000U) {
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+      if (auto r = EXP2M1F_EXCEPTS_LO.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
+        return r.value();
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+      // Minimax polynomial generated by Sollya with:
+      // > display = hexadecimal;
+      // > fpminimax((2^x - 1)/x, 5, [|D...|], [-2^-5, 2^-5]);
+      constexpr double COEFFS[] = {
+          0x1.62e42fefa39f3p-1, 0x1.ebfbdff82c57bp-3,  0x1.c6b08d6f2d7aap-5,
+          0x1.3b2ab6fc92f5dp-7, 0x1.5d897cfe27125p-10, 0x1.43090e61e6af1p-13};
+      double xd = x;
+      double xsq = xd * xd;
+      double c0 = fputil::multiply_add(xd, COEFFS[1], COEFFS[0]);
+      double c1 = fputil::multiply_add(xd, COEFFS[3], COEFFS[2]);
+      double c2 = fputil::multiply_add(xd, COEFFS[5], COEFFS[4]);
+      double p = fputil::polyeval(xsq, c0, c1, c2);
+      return static_cast<float>(p * xd);
+    }
+
+    // x >= 128, or x is nan
+    if (xbits.is_pos()) {
+      if (xbits.is_finite()) {
+        int rounding = fputil::quick_get_round();
+        if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)
+          return FPBits::max_normal().get_val();
+
+        fputil::set_errno_if_required(ERANGE);
+        fputil::raise_except_if_required(FE_OVERFLOW);
+      }
+
+      // x >= 128 and 2^x - 1 rounds to +inf, or x is +inf or nan
+      return x + FPBits::inf().get_val();
+    }
+  }
+
+  if (LIBC_UNLIKELY(x <= -25.0f)) {
+    // 2^(-inf) - 1 = -1
+    if (xbits.is_inf())
+      return -1.0f;
+    // 2^nan - 1 = nan
+    if (xbits.is_nan())
+      return x;
+
+    int rounding = fputil::quick_get_round();
+    if (rounding == FE_UPWARD || rounding == FE_TOWARDZERO)
+      return -0x1.ffff'fep-1f; // -1.0f + 0x1.0p-24f
+
+    fputil::set_errno_if_required(ERANGE);
+    fputil::raise_except_if_required(FE_UNDERFLOW);
+    return -1.0f;
+  }
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  if (auto r = EXP2M1F_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
+    return r.value();
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+  // For -25 < x < 128, to compute 2^x, we perform the following range
+  // reduction: find hi, mid, lo such that:
+  //   x = hi + mid + lo, in which:
+  //     hi is an integer,
+  //     0 <= mid * 2^5 < 32 is an integer,
+  //     -2^(-6) <= lo <= 2^(-6).
+  // In particular,
+  //   hi + mid = round(x * 2^5) * 2^(-5).
+  // Then,
+  //   2^x = 2^(hi + mid + lo) = 2^hi * 2^mid * 2^lo.
+  // 2^mid is stored in the lookup table of 32 elements.
+  // 2^lo is computed using a degree-4 minimax polynomial generated by Sollya.
+  // We perform 2^hi * 2^mid by simply add hi to the exponent field of 2^mid.
+
+  // kf = (hi + mid) * 2^5 = round(x * 2^5)
+  float kf = 0;
+  int k = 0;
+#ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT
+  kf = fputil::nearest_integer(x * 32.0f);
+  k = static_cast<int>(kf);
+#else
+  constexpr float HALF[2] = {0.5f, -0.5f};
+  k = static_cast<int>(fputil::multiply_add(x, 32.0f, HALF[x < 0.0f]));
+  kf = static_cast<float>(k);
+#endif // LIBC_TARGET_CPU_HAS_NEAREST_INT
+
+  // lo = x - (hi + mid) = x - kf * 2^(-5)
+  double lo = fputil::multiply_add(-0x1.0p-5f, kf, x);
+
+  // hi = floor(kf * 2^(-4))
+  // exp2_hi = shift hi to the exponent field of double precision.
+  int64_t exp2_hi =
+      static_cast<int64_t>(static_cast<uint64_t>(k >> ExpBase::MID_BITS)
+                           << fputil::FPBits<double>::FRACTION_LEN);
+  // mh = 2^hi * 2^mid
+  // mh_bits = bit field of mh
+  int64_t mh_bits = ExpBase::EXP_2_MID[k & ExpBase::MID_MASK] + exp2_hi;
+  double mh = fputil::FPBits<double>(static_cast<uint64_t>(mh_bits)).get_val();
+
+  // Degree-4 polynomial approximating (2^x - 1)/x generated by Sollya with:
+  // > display = hexadecimal;
+  // > fpminimax((2^x - 1)/x, 4, [|D...|], [-2^-6, 2^-6]);
+  constexpr double COEFFS[5] = {0x1.62e42fefa39efp-1, 0x1.ebfbdff8131c4p-3,
+                                0x1.c6b08d7061695p-5, 0x1.3b2b1bee74b2ap-7,
+                                0x1.5d88091198529p-10};
+  double lo_sq = lo * lo;
+  double c1 = fputil::multiply_add(lo, COEFFS[0], 1.0);
+  double c2 = fputil::multiply_add(lo, COEFFS[2], COEFFS[1]);
+  double c3 = fputil::multiply_add(lo, COEFFS[4], COEFFS[3]);
+  double exp2_lo = fputil::polyeval(lo_sq, c1, c2, c3);
+  // 2^x - 1 = 2^(hi + mid + lo) - 1
+  //         = 2^(hi + mid) * 2^lo - 1
+  //         ~ mh * (1 + lo * P(lo)) - 1
+  //         = mh * exp2_lo - 1
+  return static_cast<float>(fputil::multiply_add(exp2_lo, mh, -1.0));
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F_H
diff --git a/libc/src/__support/math/exp2m1f16.h b/libc/src/__support/math/exp2m1f16.h
new file mode 100644
index 0000000000000..0424af4aa953d
--- /dev/null
+++ b/libc/src/__support/math/exp2m1f16.h
@@ -0,0 +1,180 @@
+//===-- Implementation header for exp2m1f16 ----------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+#include "src/__support/macros/properties/cpu_features.h"
+#include "src/__support/math/expxf16_utils.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float16 exp2m1f16(float16 x) {
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  constexpr fputil::ExceptValues<float16, 6> EXP2M1F16_EXCEPTS_LO = {{
+      // (input, RZ output, RU offset, RD offset, RN offset)
+      // x = 0x1.cf4p-13, exp2m1f16(x) = 0x1.41p-13 (RZ)
+      {0x0b3dU, 0x0904U, 1U, 0U, 1U},
+      // x = 0x1.4fcp-12, exp2m1f16(x) = 0x1.d14p-13 (RZ)
+      {0x0d3fU, 0x0b45U, 1U, 0U, 1U},
+      // x = 0x1.63p-11, exp2m1f16(x) = 0x1.ec4p-12 (RZ)
+      {0x118cU, 0x0fb1U, 1U, 0U, 0U},
+      // x = 0x1.6fp-7, exp2m1f16(x) = 0x1.fe8p-8 (RZ)
+      {0x21bcU, 0x1ffaU, 1U, 0U, 1U},
+      // x = -0x1.c6p-10, exp2m1f16(x) = -0x1.3a8p-10 (RZ)
+      {0x9718U, 0x94eaU, 0U, 1U, 0U},
+      // x = -0x1.cfcp-10, exp2m1f16(x) = -0x1.414p-10 (RZ)
+      {0x973fU, 0x9505U, 0U, 1U, 0U},
+  }};
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
+  constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 6;
+#else
+  constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 7;
+#endif
+
+  constexpr fputil::ExceptValues<float16, N_EXP2M1F16_EXCEPTS_HI>
+      EXP2M1F16_EXCEPTS_HI = {{
+          // (input, RZ output, RU offset, RD offset, RN offset)
+          // x = 0x1.e58p-3, exp2m1f16(x) = 0x1.6dcp-3 (RZ)
+          {0x3396U, 0x31b7U, 1U, 0U, 0U},
+#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
+          // x = 0x1.2e8p-2, exp2m1f16(x) = 0x1.d14p-3 (RZ)
+          {0x34baU, 0x3345U, 1U, 0U, 0U},
+#endif
+          // x = 0x1.ad8p-2, exp2m1f16(x) = 0x1.598p-2 (RZ)
+          {0x36b6U, 0x3566U, 1U, 0U, 0U},
+#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
+          // x = 0x1.edcp-2, exp2m1f16(x) = 0x1.964p-2 (RZ)
+          {0x37b7U, 0x3659U, 1U, 0U, 1U},
+#endif
+          // x = -0x1.804p-3, exp2m1f16(x) = -0x1.f34p-4 (RZ)
+          {0xb201U, 0xafcdU, 0U, 1U, 1U},
+          // x = -0x1.f3p-3, exp2m1f16(x) = -0x1.3e4p-3 (RZ)
+          {0xb3ccU, 0xb0f9U, 0U, 1U, 0U},
+          // x = -0x1.294p-1, exp2m1f16(x) = -0x1.53p-2 (RZ)
+          {0xb8a5U, 0xb54cU, 0U, 1U, 1U},
+#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
+          // x = -0x1.a34p-1, exp2m1f16(x) = -0x1.bb4p-2 (RZ)
+          {0xba8dU, 0xb6edU, 0U, 1U, 1U},
+#endif
+      }};
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+  using namespace math::expxf16_internal;
+  using FPBits = fputil::FPBits<float16>;
+  FPBits x_bits(x);
+
+  uint16_t x_u = x_bits.uintval();
+  uint16_t x_abs = x_u & 0x7fffU;
+
+  // When |x| <= 2^(-3), or |x| >= 11, or x is NaN.
+  if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x4980U)) {
+    // exp2m1(NaN) = NaN
+    if (x_bits.is_nan()) {
+      if (x_bits.is_signaling_nan()) {
+        fputil::raise_except_if_required(FE_INVALID);
+        return FPBits::quiet_nan().get_val();
+      }
+
+      return x;
+    }
+
+    // When x >= 16.
+    if (x_u >= 0x4c00 && x_bits.is_pos()) {
+      // exp2m1(+inf) = +inf
+      if (x_bits.is_inf())
+        return FPBits::inf().get_val();
+
+      switch (fputil::quick_get_round()) {
+      case FE_TONEAREST:
+      case FE_UPWARD:
+        fputil::set_errno_if_required(ERANGE);
+        fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT);
+        return FPBits::inf().get_val();
+      default:
+        return FPBits::max_normal().get_val();
+      }
+    }
+
+    // When x < -11.
+    if (x_u > 0xc980U) {
+      // exp2m1(-inf) = -1
+      if (x_bits.is_inf())
+        return FPBits::one(Sign::NEG).get_val();
+
+      // When -12 < x < -11, round(2^x - 1, HP, RN) = -0x1.ffcp-1.
+      if (x_u < 0xca00U)
+        return fputil::round_result_slightly_down(
+            fputil::cast<float16>(-0x1.ffcp-1));
+
+      // When x <= -12, round(2^x - 1, HP, RN) = -1.
+      switch (fputil::quick_get_round()) {
+      case FE_TONEAREST:
+      case FE_DOWNWARD:
+        return FPBits::one(Sign::NEG).get_val();
+      default:
+        return fputil::cast<float16>(-0x1.ffcp-1);
+      }
+    }
+
+    // When |x| <= 2^(-3).
+    if (x_abs <= 0x3000U) {
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+      if (auto r = EXP2M1F16_EXCEPTS_LO.lookup(x_u);
+          LIBC_UNLIKELY(r.has_value()))
+        return r.value();
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+      float xf = x;
+      // Degree-5 minimax polynomial generated by Sollya with the following
+      // commands:
+      //   > display = hexadecimal;
+      //   > P = fpminimax((2^x - 1)/x, 4, [|SG...|], [-2^-3, 2^-3]);
+      //   > x * P;
+      return fputil::cast<float16>(
+          xf * fputil::polyeval(xf, 0x1.62e43p-1f, 0x1.ebfbdep-3f,
+                                0x1.c6af88p-5f, 0x1.3b45d6p-7f,
+                                0x1.641e7cp-10f));
+    }
+  }
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  if (auto r = EXP2M1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
+    return r.value();
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+  // exp2(x) = exp2(hi + mid) * exp2(lo)
+  auto [exp2_hi_mid, exp2_lo] = exp2_range_reduction(x);
+  // exp2m1(x) = exp2(hi + mid) * exp2(lo) - 1
+  return fputil::cast<float16>(
+      fputil::multiply_add(exp2_hi_mid, exp2_lo, -1.0f));
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F16_H
diff --git a/libc/src/__support/math/expm1.h b/libc/src/__support/math/expm1.h
new file mode 100644
index 0000000000000..4bbb20ffbf7a1
--- /dev/null
+++ b/libc/src/__support/math/expm1.h
@@ -0,0 +1,518 @@
+//===-- Implementation header for expm1 -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXPM1_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_EXPM1_H
+
+#include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2.
+#include "exp_constants.h"
+#include "src/__support/CPP/bit.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/FPUtil/triple_double.h"
+#include "src/__support/common.h"
+#include "src/__support/integer_literals.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+namespace expm1_internal {
+
+#if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0)
+#define LIBC_MATH_EXPM1_SKIP_ACCURATE_PASS
+#endif
+
+using fputil::DoubleDouble;
+using fputil::TripleDouble;
+using Float128 = typename fputil::DyadicFloat<128>;
+
+using LIBC_NAMESPACE::operator""_u128;
+
+// log2(e)
+static constexpr double LOG2_E = 0x1.71547652b82fep+0;
+
+// Error bounds:
+// Errors when using double precision.
+// 0x1.8p-63;
+static constexpr uint64_t ERR_D = 0x3c08000000000000;
+// Errors when using double-double precision.
+// 0x1.0p-99
+[[maybe_unused]] static constexpr uint64_t ERR_DD = 0x39c0000000000000;
+
+// -2^-12 * log(2)
+// > a = -2^-12 * log(2);
+// > b = round(a, 30, RN);
+// > c = round(a - b, 30, RN);
+// > d = round(a - b - c, D, RN);
+// Errors < 1.5 * 2^-133
+static constexpr double MLOG_2_EXP2_M12_HI = -0x1.62e42ffp-13;
+static constexpr double MLOG_2_EXP2_M12_MID = 0x1.718432a1b0e26p-47;
+static constexpr double MLOG_2_EXP2_M12_MID_30 = 0x1.718432ap-47;
+static constexpr double MLOG_2_EXP2_M12_LO = 0x1.b0e2633fe0685p-79;
+
+using namespace common_constants_internal;
+
+// Polynomial approximations with double precision:
+// Return expm1(dx) / x ~ 1 + dx / 2 + dx^2 / 6 + dx^3 / 24.
+// For |dx| < 2^-13 + 2^-30:
+//   | output - expm1(dx) / dx | < 2^-51.
+LIBC_INLINE static double poly_approx_d(double dx) {
+  // dx^2
+  double dx2 = dx * dx;
+  // c0 = 1 + dx / 2
+  double c0 = fputil::multiply_add(dx, 0.5, 1.0);
+  // c1 = 1/6 + dx / 24
+  double c1 =
+      fputil::multiply_add(dx, 0x1.5555555555555p-5, 0x1.5555555555555p-3);
+  // p = dx^2 * c1 + c0 = 1 + dx / 2 + dx^2 / 6 + dx^3 / 24
+  double p = fputil::multiply_add(dx2, c1, c0);
+  return p;
+}
+
+// Polynomial approximation with double-double precision:
+// Return expm1(dx) / dx ~ 1 + dx / 2 + dx^2 / 6 + ... + dx^6 / 5040
+// For |dx| < 2^-13 + 2^-30:
+//   | output - expm1(dx) | < 2^-101
+LIBC_INLINE static constexpr DoubleDouble
+poly_approx_dd(const DoubleDouble &dx) {
+  // Taylor polynomial.
+  constexpr DoubleDouble COEFFS[] = {
+      {0, 0x1p0},                                      // 1
+      {0, 0x1p-1},                                     // 1/2
+      {0x1.5555555555555p-57, 0x1.5555555555555p-3},   // 1/6
+      {0x1.5555555555555p-59, 0x1.5555555555555p-5},   // 1/24
+      {0x1.1111111111111p-63, 0x1.1111111111111p-7},   // 1/120
+      {-0x1.f49f49f49f49fp-65, 0x1.6c16c16c16c17p-10}, // 1/720
+      {0x1.a01a01a01a01ap-73, 0x1.a01a01a01a01ap-13},  // 1/5040
+  };
+
+  DoubleDouble p = fputil::polyeval(dx, COEFFS[0], COEFFS[1], COEFFS[2],
+                                    COEFFS[3], COEFFS[4], COEFFS[5], COEFFS[6]);
+  return p;
+}
+
+// Polynomial approximation with 128-bit precision:
+// Return (exp(dx) - 1)/dx ~ 1 + dx / 2 + dx^2 / 6 + ... + dx^6 / 5040
+// For |dx| < 2^-13 + 2^-30:
+//   | output - exp(dx) | < 2^-126.
+[[maybe_unused]] LIBC_INLINE static constexpr Float128
+poly_approx_f128(const Float128 &dx) {
+  constexpr Float128 COEFFS_128[]{
+      {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0
+      {Sign::POS, -128, 0x80000000'00000000'00000000'00000000_u128}, // 0.5
+      {Sign::POS, -130, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, // 1/6
+      {Sign::POS, -132, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, // 1/24
+      {Sign::POS, -134, 0x88888888'88888888'88888888'88888889_u128}, // 1/120
+      {Sign::POS, -137, 0xb60b60b6'0b60b60b'60b60b60'b60b60b6_u128}, // 1/720
+      {Sign::POS, -140, 0xd00d00d0'0d00d00d'00d00d00'd00d00d0_u128}, // 1/5040
+  };
+
+  Float128 p = fputil::polyeval(dx, COEFFS_128[0], COEFFS_128[1], COEFFS_128[2],
+                                COEFFS_128[3], COEFFS_128[4], COEFFS_128[5],
+                                COEFFS_128[6]);
+  return p;
+}
+
+#ifdef DEBUGDEBUG
+std::ostream &operator<<(std::ostream &OS, const Float128 &r) {
+  OS << (r.sign == Sign::NEG ? "-(" : "(") << r.mantissa.val[0] << " + "
+     << r.mantissa.val[1] << " * 2^64) * 2^" << r.exponent << "\n";
+  return OS;
+}
+
+std::ostream &operator<<(std::ostream &OS, const DoubleDouble &r) {
+  OS << std::hexfloat << "(" << r.hi << " + " << r.lo << ")"
+     << std::defaultfloat << "\n";
+  return OS;
+}
+#endif
+
+// Compute exp(x) - 1 using 128-bit precision.
+// TODO(lntue): investigate triple-double precision implementation for this
+// step.
+[[maybe_unused]] LIBC_INLINE static Float128 expm1_f128(double x, double kd,
+                                                        int idx1, int idx2) {
+  // Recalculate dx:
+
+  double t1 = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact
+  double t2 = kd * MLOG_2_EXP2_M12_MID_30;                     // exact
+  double t3 = kd * MLOG_2_EXP2_M12_LO;                         // Error < 2^-133
+
+  Float128 dx = fputil::quick_add(
+      Float128(t1), fputil::quick_add(Float128(t2), Float128(t3)));
+
+  // TODO: Skip recalculating exp_mid1 and exp_mid2.
+  Float128 exp_mid1 =
+      fputil::quick_add(Float128(EXP2_MID1[idx1].hi),
+                        fputil::quick_add(Float128(EXP2_MID1[idx1].mid),
+                                          Float128(EXP2_MID1[idx1].lo)));
+
+  Float128 exp_mid2 =
+      fputil::quick_add(Float128(EXP2_MID2[idx2].hi),
+                        fputil::quick_add(Float128(EXP2_MID2[idx2].mid),
+                                          Float128(EXP2_MID2[idx2].lo)));
+
+  Float128 exp_mid = fputil::quick_mul(exp_mid1, exp_mid2);
+
+  int hi = static_cast<int>(kd) >> 12;
+  Float128 minus_one{Sign::NEG, -127 - hi,
+                     0x80000000'00000000'00000000'00000000_u128};
+
+  Float128 exp_mid_m1 = fputil::quick_add(exp_mid, minus_one);
+
+  Float128 p = poly_approx_f128(dx);
+
+  // r = exp_mid * (1 + dx * P) - 1
+  //   = (exp_mid - 1) + (dx * exp_mid) * P
+  Float128 r =
+      fputil::multiply_add(fputil::quick_mul(exp_mid, dx), p, exp_mid_m1);
+
+  r.exponent += hi;
+
+#ifdef DEBUGDEBUG
+  std::cout << "=== VERY SLOW PASS ===\n"
+            << "        kd: " << kd << "\n"
+            << "        hi: " << hi << "\n"
+            << " minus_one: " << minus_one << "        dx: " << dx
+            << "exp_mid_m1: " << exp_mid_m1 << "   exp_mid: " << exp_mid
+            << "         p: " << p << "         r: " << r << std::endl;
+#endif
+
+  return r;
+}
+
+// Compute exp(x) - 1 with double-double precision.
+LIBC_INLINE static DoubleDouble exp_double_double(double x, double kd,
+                                                  const DoubleDouble &exp_mid,
+                                                  const DoubleDouble &hi_part) {
+  // Recalculate dx:
+  //   dx = x - k * 2^-12 * log(2)
+  double t1 = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact
+  double t2 = kd * MLOG_2_EXP2_M12_MID_30;                     // exact
+  double t3 = kd * MLOG_2_EXP2_M12_LO;                         // Error < 2^-130
+
+  DoubleDouble dx = fputil::exact_add(t1, t2);
+  dx.lo += t3;
+
+  // Degree-6 Taylor polynomial approximation in double-double precision.
+  // | p - exp(x) | < 2^-100.
+  DoubleDouble p = poly_approx_dd(dx);
+
+  // Error bounds: 2^-99.
+  DoubleDouble r =
+      fputil::multiply_add(fputil::quick_mult(exp_mid, dx), p, hi_part);
+
+#ifdef DEBUGDEBUG
+  std::cout << "=== SLOW PASS ===\n"
+            << "   dx: " << dx << "    p: " << p << "    r: " << r << std::endl;
+#endif
+
+  return r;
+}
+
+// Check for exceptional cases when
+// |x| <= 2^-53 or x < log(2^-54) or x >= 0x1.6232bdd7abcd3p+9
+LIBC_INLINE static constexpr double set_exceptional(double x) {
+  using FPBits = typename fputil::FPBits<double>;
+  FPBits xbits(x);
+
+  uint64_t x_u = xbits.uintval();
+  uint64_t x_abs = xbits.abs().uintval();
+
+  // |x| <= 2^-53.
+  if (x_abs <= 0x3ca0'0000'0000'0000ULL) {
+    // expm1(x) ~ x.
+
+    if (LIBC_UNLIKELY(x_abs <= 0x0370'0000'0000'0000ULL)) {
+      if (LIBC_UNLIKELY(x_abs == 0))
+        return x;
+      // |x| <= 2^-968, need to scale up a bit before rounding, then scale it
+      // back down.
+      return 0x1.0p-200 * fputil::multiply_add(x, 0x1.0p+200, 0x1.0p-1022);
+    }
+
+    // 2^-968 < |x| <= 2^-53.
+    return fputil::round_result_slightly_up(x);
+  }
+
+  // x < log(2^-54) || x >= 0x1.6232bdd7abcd3p+9 or inf/nan.
+
+  // x < log(2^-54) or -inf/nan
+  if (x_u >= 0xc042'b708'8723'20e2ULL) {
+    // expm1(-Inf) = -1
+    if (xbits.is_inf())
+      return -1.0;
+
+    // exp(nan) = nan
+    if (xbits.is_nan())
+      return x;
+
+    return fputil::round_result_slightly_up(-1.0);
+  }
+
+  // x >= round(log(MAX_NORMAL), D, RU) = 0x1.62e42fefa39fp+9 or +inf/nan
+  // x is finite
+  if (x_u < 0x7ff0'0000'0000'0000ULL) {
+    int rounding = fputil::quick_get_round();
+    if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)
+      return FPBits::max_normal().get_val();
+
+    fputil::set_errno_if_required(ERANGE);
+    fputil::raise_except_if_required(FE_OVERFLOW);
+  }
+  // x is +inf or nan
+  return x + FPBits::inf().get_val();
+}
+
+} // namespace expm1_internal
+
+LIBC_INLINE static constexpr double expm1(double x) {
+  using namespace expm1_internal;
+
+  using FPBits = typename fputil::FPBits<double>;
+
+  FPBits xbits(x);
+
+  bool x_is_neg = xbits.is_neg();
+  uint64_t x_u = xbits.uintval();
+
+  // Upper bound: max normal number = 2^1023 * (2 - 2^-52)
+  // > round(log (2^1023 ( 2 - 2^-52 )), D, RU) = 0x1.62e42fefa39fp+9
+  // > round(log (2^1023 ( 2 - 2^-52 )), D, RD) = 0x1.62e42fefa39efp+9
+  // > round(log (2^1023 ( 2 - 2^-52 )), D, RN) = 0x1.62e42fefa39efp+9
+  // > round(exp(0x1.62e42fefa39fp+9), D, RN) = infty
+
+  // Lower bound: log(2^-54) = -0x1.2b708872320e2p5
+  // > round(log(2^-54), D, RN) = -0x1.2b708872320e2p5
+
+  // x < log(2^-54) or x >= 0x1.6232bdd7abcd3p+9 or |x| <= 2^-53.
+
+  if (LIBC_UNLIKELY(x_u >= 0xc042b708872320e2 ||
+                    (x_u <= 0xbca0000000000000 && x_u >= 0x40862e42fefa39f0) ||
+                    x_u <= 0x3ca0000000000000)) {
+    return set_exceptional(x);
+  }
+
+  // Now log(2^-54) <= x <= -2^-53 or 2^-53 <= x < log(2^1023 * (2 - 2^-52))
+
+  // Range reduction:
+  // Let x = log(2) * (hi + mid1 + mid2) + lo
+  // in which:
+  //   hi is an integer
+  //   mid1 * 2^6 is an integer
+  //   mid2 * 2^12 is an integer
+  // then:
+  //   exp(x) = 2^hi * 2^(mid1) * 2^(mid2) * exp(lo).
+  // With this formula:
+  //   - multiplying by 2^hi is exact and cheap, simply by adding the exponent
+  //     field.
+  //   - 2^(mid1) and 2^(mid2) are stored in 2 x 64-element tables.
+  //   - exp(lo) ~ 1 + lo + a0 * lo^2 + ...
+  //
+  // They can be defined by:
+  //   hi + mid1 + mid2 = 2^(-12) * round(2^12 * log_2(e) * x)
+  // If we store L2E = round(log2(e), D, RN), then:
+  //   log2(e) - L2E ~ 1.5 * 2^(-56)
+  // So the errors when computing in double precision is:
+  //   | x * 2^12 * log_2(e) - D(x * 2^12 * L2E) | <=
+  //  <= | x * 2^12 * log_2(e) - x * 2^12 * L2E | +
+  //     + | x * 2^12 * L2E - D(x * 2^12 * L2E) |
+  //  <= 2^12 * ( |x| * 1.5 * 2^-56 + eps(x))  for RN
+  //     2^12 * ( |x| * 1.5 * 2^-56 + 2*eps(x)) for other rounding modes.
+  // So if:
+  //   hi + mid1 + mid2 = 2^(-12) * round(x * 2^12 * L2E) is computed entirely
+  // in double precision, the reduced argument:
+  //   lo = x - log(2) * (hi + mid1 + mid2) is bounded by:
+  //   |lo| <= 2^-13 + (|x| * 1.5 * 2^-56 + 2*eps(x))
+  //         < 2^-13 + (1.5 * 2^9 * 1.5 * 2^-56 + 2*2^(9 - 52))
+  //         < 2^-13 + 2^-41
+  //
+
+  // The following trick computes the round(x * L2E) more efficiently
+  // than using the rounding instructions, with the tradeoff for less accuracy,
+  // and hence a slightly larger range for the reduced argument `lo`.
+  //
+  // To be precise, since |x| < |log(2^-1075)| < 1.5 * 2^9,
+  //   |x * 2^12 * L2E| < 1.5 * 2^9 * 1.5 < 2^23,
+  // So we can fit the rounded result round(x * 2^12 * L2E) in int32_t.
+  // Thus, the goal is to be able to use an additional addition and fixed width
+  // shift to get an int32_t representing round(x * 2^12 * L2E).
+  //
+  // Assuming int32_t using 2-complement representation, since the mantissa part
+  // of a double precision is unsigned with the leading bit hidden, if we add an
+  // extra constant C = 2^e1 + 2^e2 with e1 > e2 >= 2^25 to the product, the
+  // part that are < 2^e2 in resulted mantissa of (x*2^12*L2E + C) can be
+  // considered as a proper 2-complement representations of x*2^12*L2E.
+  //
+  // One small problem with this approach is that the sum (x*2^12*L2E + C) in
+  // double precision is rounded to the least significant bit of the dorminant
+  // factor C.  In order to minimize the rounding errors from this addition, we
+  // want to minimize e1.  Another constraint that we want is that after
+  // shifting the mantissa so that the least significant bit of int32_t
+  // corresponds to the unit bit of (x*2^12*L2E), the sign is correct without
+  // any adjustment.  So combining these 2 requirements, we can choose
+  //   C = 2^33 + 2^32, so that the sign bit corresponds to 2^31 bit, and hence
+  // after right shifting the mantissa, the resulting int32_t has correct sign.
+  // With this choice of C, the number of mantissa bits we need to shift to the
+  // right is: 52 - 33 = 19.
+  //
+  // Moreover, since the integer right shifts are equivalent to rounding down,
+  // we can add an extra 0.5 so that it will become round-to-nearest, tie-to-
+  // +infinity.  So in particular, we can compute:
+  //   hmm = x * 2^12 * L2E + C,
+  // where C = 2^33 + 2^32 + 2^-1, then if
+  //   k = int32_t(lower 51 bits of double(x * 2^12 * L2E + C) >> 19),
+  // the reduced argument:
+  //   lo = x - log(2) * 2^-12 * k is bounded by:
+  //   |lo| <= 2^-13 + 2^-41 + 2^-12*2^-19
+  //         = 2^-13 + 2^-31 + 2^-41.
+  //
+  // Finally, notice that k only uses the mantissa of x * 2^12 * L2E, so the
+  // exponent 2^12 is not needed.  So we can simply define
+  //   C = 2^(33 - 12) + 2^(32 - 12) + 2^(-13 - 12), and
+  //   k = int32_t(lower 51 bits of double(x * L2E + C) >> 19).
+
+  // Rounding errors <= 2^-31 + 2^-41.
+  double tmp = fputil::multiply_add(x, LOG2_E, 0x1.8000'0000'4p21);
+  int k = static_cast<int>(cpp::bit_cast<uint64_t>(tmp) >> 19);
+  double kd = static_cast<double>(k);
+
+  uint32_t idx1 = (k >> 6) & 0x3f;
+  uint32_t idx2 = k & 0x3f;
+  int hi = k >> 12;
+
+  DoubleDouble exp_mid1{EXP2_MID1[idx1].mid, EXP2_MID1[idx1].hi};
+  DoubleDouble exp_mid2{EXP2_MID2[idx2].mid, EXP2_MID2[idx2].hi};
+
+  DoubleDouble exp_mid = fputil::quick_mult(exp_mid1, exp_mid2);
+
+  // -2^(-hi)
+  double one_scaled =
+      FPBits::create_value(Sign::NEG, FPBits::EXP_BIAS - hi, 0).get_val();
+
+  // 2^(mid1 + mid2) - 2^(-hi)
+  DoubleDouble hi_part = x_is_neg ? fputil::exact_add(one_scaled, exp_mid.hi)
+                                  : fputil::exact_add(exp_mid.hi, one_scaled);
+
+  hi_part.lo += exp_mid.lo;
+
+  // |x - (hi + mid1 + mid2) * log(2) - dx| < 2^11 * eps(M_LOG_2_EXP2_M12.lo)
+  //                                        = 2^11 * 2^-13 * 2^-52
+  //                                        = 2^-54.
+  // |dx| < 2^-13 + 2^-30.
+  double lo_h = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact
+  double dx = fputil::multiply_add(kd, MLOG_2_EXP2_M12_MID, lo_h);
+
+  // We use the degree-4 Taylor polynomial to approximate exp(lo):
+  //   exp(lo) ~ 1 + lo + lo^2 / 2 + lo^3 / 6 + lo^4 / 24 = 1 + lo * P(lo)
+  // So that the errors are bounded by:
+  //   |P(lo) - expm1(lo)/lo| < |lo|^4 / 64 < 2^(-13 * 4) / 64 = 2^-58
+  // Let P_ be an evaluation of P where all intermediate computations are in
+  // double precision.  Using either Horner's or Estrin's schemes, the evaluated
+  // errors can be bounded by:
+  //      |P_(dx) - P(dx)| < 2^-51
+  //   => |dx * P_(dx) - expm1(lo) | < 1.5 * 2^-64
+  //   => 2^(mid1 + mid2) * |dx * P_(dx) - expm1(lo)| < 1.5 * 2^-63.
+  // Since we approximate
+  //   2^(mid1 + mid2) ~ exp_mid.hi + exp_mid.lo,
+  // We use the expression:
+  //    (exp_mid.hi + exp_mid.lo) * (1 + dx * P_(dx)) ~
+  //  ~ exp_mid.hi + (exp_mid.hi * dx * P_(dx) + exp_mid.lo)
+  // with errors bounded by 1.5 * 2^-63.
+
+  // Finally, we have the following approximation formula:
+  //   expm1(x) = 2^hi * 2^(mid1 + mid2) * exp(lo) - 1
+  //            = 2^hi * ( 2^(mid1 + mid2) * exp(lo) - 2^(-hi) )
+  //            ~ 2^hi * ( (exp_mid.hi - 2^-hi) +
+  //                       + (exp_mid.hi * dx * P_(dx) + exp_mid.lo))
+
+  double mid_lo = dx * exp_mid.hi;
+
+  // Approximate expm1(dx)/dx ~ 1 + dx / 2 + dx^2 / 6 + dx^3 / 24.
+  double p = poly_approx_d(dx);
+
+  double lo = fputil::multiply_add(p, mid_lo, hi_part.lo);
+
+  // TODO: The following line leaks encoding abstraction. Use FPBits methods
+  // instead.
+  uint64_t err = x_is_neg ? (static_cast<uint64_t>(-hi) << 52) : 0;
+
+  double err_d = cpp::bit_cast<double>(ERR_D + err);
+
+  double upper = hi_part.hi + (lo + err_d);
+  double lower = hi_part.hi + (lo - err_d);
+
+#ifdef DEBUGDEBUG
+  std::cout << "=== FAST PASS ===\n"
+            << "      x: " << std::hexfloat << x << std::defaultfloat << "\n"
+            << "      k: " << k << "\n"
+            << "   idx1: " << idx1 << "\n"
+            << "   idx2: " << idx2 << "\n"
+            << "     hi: " << hi << "\n"
+            << "     dx: " << std::hexfloat << dx << std::defaultfloat << "\n"
+            << "exp_mid: " << exp_mid << "hi_part: " << hi_part
+            << " mid_lo: " << std::hexfloat << mid_lo << std::defaultfloat
+            << "\n"
+            << "      p: " << std::hexfloat << p << std::defaultfloat << "\n"
+            << "     lo: " << std::hexfloat << lo << std::defaultfloat << "\n"
+            << "  upper: " << std::hexfloat << upper << std::defaultfloat
+            << "\n"
+            << "  lower: " << std::hexfloat << lower << std::defaultfloat
+            << "\n"
+            << std::endl;
+#endif
+
+  if (LIBC_LIKELY(upper == lower)) {
+    // to multiply by 2^hi, a fast way is to simply add hi to the exponent
+    // field.
+    int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
+    double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper));
+    return r;
+  }
+
+  // Use double-double
+  DoubleDouble r_dd = exp_double_double(x, kd, exp_mid, hi_part);
+
+#ifdef LIBC_MATH_EXPM1_SKIP_ACCURATE_PASS
+  int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
+  double r =
+      cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(r_dd.hi + r_dd.lo));
+  return r;
+#else
+  double err_dd = cpp::bit_cast<double>(ERR_DD + err);
+
+  double upper_dd = r_dd.hi + (r_dd.lo + err_dd);
+  double lower_dd = r_dd.hi + (r_dd.lo - err_dd);
+
+  if (LIBC_LIKELY(upper_dd == lower_dd)) {
+    int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
+    double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper_dd));
+    return r;
+  }
+
+  // Use 128-bit precision
+  Float128 r_f128 = expm1_f128(x, kd, idx1, idx2);
+
+  return static_cast<double>(r_f128);
+#endif // LIBC_MATH_EXPM1_SKIP_ACCURATE_PASS
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXPM1_H
diff --git a/libc/src/__support/math/expm1f.h b/libc/src/__support/math/expm1f.h
new file mode 100644
index 0000000000000..43e79ae3112dc
--- /dev/null
+++ b/libc/src/__support/math/expm1f.h
@@ -0,0 +1,182 @@
+//===-- Implementation header for expm1f ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXPM1F_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_EXPM1F_H
+
+#include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2.
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FMA.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"            // LIBC_UNLIKELY
+#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float expm1f(float x) {
+  using namespace common_constants_internal;
+  using FPBits = typename fputil::FPBits<float>;
+  FPBits xbits(x);
+
+  uint32_t x_u = xbits.uintval();
+  uint32_t x_abs = x_u & 0x7fff'ffffU;
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  // Exceptional value
+  if (LIBC_UNLIKELY(x_u == 0x3e35'bec5U)) { // x = 0x1.6b7d8ap-3f
+    int round_mode = fputil::quick_get_round();
+    if (round_mode == FE_TONEAREST || round_mode == FE_UPWARD)
+      return 0x1.8dbe64p-3f;
+    return 0x1.8dbe62p-3f;
+  }
+#if !defined(LIBC_TARGET_CPU_HAS_FMA_DOUBLE)
+  if (LIBC_UNLIKELY(x_u == 0xbdc1'c6cbU)) { // x = -0x1.838d96p-4f
+    int round_mode = fputil::quick_get_round();
+    if (round_mode == FE_TONEAREST || round_mode == FE_DOWNWARD)
+      return -0x1.71c884p-4f;
+    return -0x1.71c882p-4f;
+  }
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+  // When |x| > 25*log(2), or nan
+  if (LIBC_UNLIKELY(x_abs >= 0x418a'a123U)) {
+    // x < log(2^-25)
+    if (xbits.is_neg()) {
+      // exp(-Inf) = 0
+      if (xbits.is_inf())
+        return -1.0f;
+      // exp(nan) = nan
+      if (xbits.is_nan())
+        return x;
+      int round_mode = fputil::quick_get_round();
+      if (round_mode == FE_UPWARD || round_mode == FE_TOWARDZERO)
+        return -0x1.ffff'fep-1f; // -1.0f + 0x1.0p-24f
+      return -1.0f;
+    } else {
+      // x >= 89 or nan
+      if (xbits.uintval() >= 0x42b2'0000) {
+        if (xbits.uintval() < 0x7f80'0000U) {
+          int rounding = fputil::quick_get_round();
+          if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)
+            return FPBits::max_normal().get_val();
+
+          fputil::set_errno_if_required(ERANGE);
+          fputil::raise_except_if_required(FE_OVERFLOW);
+        }
+        return x + FPBits::inf().get_val();
+      }
+    }
+  }
+
+  // |x| < 2^-4
+  if (x_abs < 0x3d80'0000U) {
+    // |x| < 2^-25
+    if (x_abs < 0x3300'0000U) {
+      // x = -0.0f
+      if (LIBC_UNLIKELY(xbits.uintval() == 0x8000'0000U))
+        return x;
+      // When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x
+      // is:
+      //   |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x|
+      //                               = |x|
+      //                               < 2^-25
+      //                               < epsilon(1)/2.
+      // So the correctly rounded values of expm1(x) are:
+      //   = x + eps(x) if rounding mode = FE_UPWARD,
+      //                   or (rounding mode = FE_TOWARDZERO and x is
+      //                   negative),
+      //   = x otherwise.
+      // To simplify the rounding decision and make it more efficient, we use
+      //   fma(x, x, x) ~ x + x^2 instead.
+      // Note: to use the formula x + x^2 to decide the correct rounding, we
+      // do need fma(x, x, x) to prevent underflow caused by x*x when |x| <
+      // 2^-76. For targets without FMA instructions, we simply use double for
+      // intermediate results as it is more efficient than using an emulated
+      // version of FMA.
+#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
+      return fputil::multiply_add(x, x, x);
+#else
+      double xd = x;
+      return static_cast<float>(fputil::multiply_add(xd, xd, xd));
+#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
+    }
+
+    constexpr double COEFFS[] = {0x1p-1,
+                                 0x1.55555555557ddp-3,
+                                 0x1.55555555552fap-5,
+                                 0x1.111110fcd58b7p-7,
+                                 0x1.6c16c1717660bp-10,
+                                 0x1.a0241f0006d62p-13,
+                                 0x1.a01e3f8d3c06p-16};
+
+    // 2^-25 <= |x| < 2^-4
+    double xd = static_cast<double>(x);
+    double xsq = xd * xd;
+    // Degree-8 minimax polynomial generated by Sollya with:
+    // > display = hexadecimal;
+    // > P = fpminimax((expm1(x) - x)/x^2, 6, [|D...|], [-2^-4, 2^-4]);
+
+    double c0 = fputil::multiply_add(xd, COEFFS[1], COEFFS[0]);
+    double c1 = fputil::multiply_add(xd, COEFFS[3], COEFFS[2]);
+    double c2 = fputil::multiply_add(xd, COEFFS[5], COEFFS[4]);
+
+    double r = fputil::polyeval(xsq, c0, c1, c2, COEFFS[6]);
+    return static_cast<float>(fputil::multiply_add(r, xsq, xd));
+  }
+
+  // For -18 < x < 89, to compute expm1(x), we perform the following range
+  // reduction: find hi, mid, lo such that:
+  //   x = hi + mid + lo, in which
+  //     hi is an integer,
+  //     mid * 2^7 is an integer
+  //     -2^(-8) <= lo < 2^-8.
+  // In particular,
+  //   hi + mid = round(x * 2^7) * 2^(-7).
+  // Then,
+  //   expm1(x) = exp(hi + mid + lo) - 1 = exp(hi) * exp(mid) * exp(lo) - 1.
+  // We store exp(hi) and exp(mid) in the lookup tables EXP_M1 and EXP_M2
+  // respectively.  exp(lo) is computed using a degree-4 minimax polynomial
+  // generated by Sollya.
+
+  // x_hi = hi + mid.
+  float kf = fputil::nearest_integer(x * 0x1.0p7f);
+  int x_hi = static_cast<int>(kf);
+  // Subtract (hi + mid) from x to get lo.
+  double xd = static_cast<double>(fputil::multiply_add(kf, -0x1.0p-7f, x));
+  x_hi += 104 << 7;
+  // hi = x_hi >> 7
+  double exp_hi = EXP_M1[x_hi >> 7];
+  // lo = x_hi & 0x0000'007fU;
+  double exp_mid = EXP_M2[x_hi & 0x7f];
+  double exp_hi_mid = exp_hi * exp_mid;
+  // Degree-4 minimax polynomial generated by Sollya with the following
+  // commands:
+  //   > display = hexadecimal;
+  //   > Q = fpminimax(expm1(x)/x, 3, [|D...|], [-2^-8, 2^-8]);
+  //   > Q;
+  double exp_lo =
+      fputil::polyeval(xd, 0x1.0p0, 0x1.ffffffffff777p-1, 0x1.000000000071cp-1,
+                       0x1.555566668e5e7p-3, 0x1.55555555ef243p-5);
+  return static_cast<float>(fputil::multiply_add(exp_hi_mid, exp_lo, -1.0));
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXPM1F_H
diff --git a/libc/src/__support/math/expm1f16.h b/libc/src/__support/math/expm1f16.h
new file mode 100644
index 0000000000000..79547b62b0892
--- /dev/null
+++ b/libc/src/__support/math/expm1f16.h
@@ -0,0 +1,153 @@
+//===-- Implementation header for expm1f16 ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXPM1F16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_EXPM1F16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+#include "src/__support/math/expxf16_utils.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float16 expm1f16(float16 x) {
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  constexpr fputil::ExceptValues<float16, 1> EXPM1F16_EXCEPTS_LO = {{
+      // (input, RZ output, RU offset, RD offset, RN offset)
+      // x = 0x1.564p-5, expm1f16(x) = 0x1.5d4p-5 (RZ)
+      {0x2959U, 0x2975U, 1U, 0U, 1U},
+  }};
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
+  constexpr size_t N_EXPM1F16_EXCEPTS_HI = 2;
+#else
+  constexpr size_t N_EXPM1F16_EXCEPTS_HI = 3;
+#endif
+
+  constexpr fputil::ExceptValues<float16, N_EXPM1F16_EXCEPTS_HI>
+      EXPM1F16_EXCEPTS_HI = {{
+          // (input, RZ output, RU offset, RD offset, RN offset)
+          // x = 0x1.c34p+0, expm1f16(x) = 0x1.34cp+2 (RZ)
+          {0x3f0dU, 0x44d3U, 1U, 0U, 1U},
+          // x = -0x1.e28p-3, expm1f16(x) = -0x1.adcp-3 (RZ)
+          {0xb38aU, 0xb2b7U, 0U, 1U, 1U},
+#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
+          // x = 0x1.a08p-3, exp10m1f(x) = 0x1.cdcp-3 (RZ)
+          {0x3282U, 0x3337U, 1U, 0U, 0U},
+#endif
+      }};
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+  using namespace math::expxf16_internal;
+  using FPBits = fputil::FPBits<float16>;
+  FPBits x_bits(x);
+
+  uint16_t x_u = x_bits.uintval();
+  uint16_t x_abs = x_u & 0x7fffU;
+
+  // When |x| <= 2^(-3), or |x| >= -11 * log(2), or x is NaN.
+  if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x47a0U)) {
+    // expm1(NaN) = NaN
+    if (x_bits.is_nan()) {
+      if (x_bits.is_signaling_nan()) {
+        fputil::raise_except_if_required(FE_INVALID);
+        return FPBits::quiet_nan().get_val();
+      }
+
+      return x;
+    }
+
+    // expm1(+/-0) = +/-0
+    if (x_abs == 0)
+      return x;
+
+    // When x >= 16 * log(2).
+    if (x_bits.is_pos() && x_abs >= 0x498cU) {
+      // expm1(+inf) = +inf
+      if (x_bits.is_inf())
+        return FPBits::inf().get_val();
+
+      switch (fputil::quick_get_round()) {
+      case FE_TONEAREST:
+      case FE_UPWARD:
+        fputil::set_errno_if_required(ERANGE);
+        fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT);
+        return FPBits::inf().get_val();
+      default:
+        return FPBits::max_normal().get_val();
+      }
+    }
+
+    // When x <= -11 * log(2).
+    if (x_u >= 0xc7a0U) {
+      // expm1(-inf) = -1
+      if (x_bits.is_inf())
+        return FPBits::one(Sign::NEG).get_val();
+
+      // When x > -0x1.0ap+3, round(expm1(x), HP, RN) = -1.
+      if (x_u > 0xc828U)
+        return fputil::round_result_slightly_up(
+            FPBits::one(Sign::NEG).get_val());
+      // When x <= -0x1.0ap+3, round(expm1(x), HP, RN) = -0x1.ffcp-1.
+      return fputil::round_result_slightly_down(
+          fputil::cast<float16>(-0x1.ffcp-1));
+    }
+
+    // When 0 < |x| <= 2^(-3).
+    if (x_abs <= 0x3000U && !x_bits.is_zero()) {
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+      if (auto r = EXPM1F16_EXCEPTS_LO.lookup(x_u);
+          LIBC_UNLIKELY(r.has_value()))
+        return r.value();
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+      float xf = x;
+      // Degree-5 minimax polynomial generated by Sollya with the following
+      // commands:
+      //   > display = hexadecimal;
+      //   > P = fpminimax(expm1(x)/x, 4, [|SG...|], [-2^-3, 2^-3]);
+      //   > x * P;
+      return fputil::cast<float16>(
+          xf * fputil::polyeval(xf, 0x1p+0f, 0x1.fffff8p-2f, 0x1.555556p-3f,
+                                0x1.55905ep-5f, 0x1.1124c2p-7f));
+    }
+  }
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  if (auto r = EXPM1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
+    return r.value();
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+  // exp(x) = exp(hi + mid) * exp(lo)
+  auto [exp_hi_mid, exp_lo] = exp_range_reduction(x);
+  // expm1(x) = exp(hi + mid) * exp(lo) - 1
+  return fputil::cast<float16>(fputil::multiply_add(exp_hi_mid, exp_lo, -1.0f));
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXPM1F16_H
diff --git a/libc/src/__support/math/fma.h b/libc/src/__support/math/fma.h
new file mode 100644
index 0000000000000..d996610167a19
--- /dev/null
+++ b/libc/src/__support/math/fma.h
@@ -0,0 +1,27 @@
+//===-- Implementation header for fma ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_FMA_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_FMA_H
+
+#include "src/__support/FPUtil/FMA.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static double fma(double x, double y, double z) {
+  return fputil::fma<double>(x, y, z);
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_FMA_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 0754b5e0b08e1..7103c6947eba0 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -1488,17 +1488,7 @@ add_entrypoint_object(
   HDRS
     ../exp2m1f.h
   DEPENDS
-    libc.src.errno.errno
-    libc.src.__support.common
-    libc.src.__support.FPUtil.except_value_utils
-    libc.src.__support.FPUtil.fenv_impl
-    libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.polyeval
-    libc.src.__support.FPUtil.rounding_mode
-    libc.src.__support.macros.optimization
-    libc.src.__support.macros.properties.cpu_features
-    libc.src.__support.math.exp10f_utils
+    libc.src.__support.math.exp2m1f
 )
 
 add_entrypoint_object(
@@ -1508,19 +1498,7 @@ add_entrypoint_object(
   HDRS
     ../exp2m1f16.h
   DEPENDS
-    libc.hdr.errno_macros
-    libc.hdr.fenv_macros
-    libc.src.__support.common
-    libc.src.__support.FPUtil.cast
-    libc.src.__support.FPUtil.except_value_utils
-    libc.src.__support.FPUtil.fenv_impl
-    libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.polyeval
-    libc.src.__support.FPUtil.rounding_mode
-    libc.src.__support.macros.optimization
-    libc.src.__support.macros.properties.cpu_features
-    libc.src.__support.math.expxf16_utils
+    libc.src.__support.math.exp2m1f16
 )
 
 add_entrypoint_object(
@@ -1583,18 +1561,7 @@ add_entrypoint_object(
   HDRS
     ../expm1.h
   DEPENDS
-    libc.src.__support.CPP.bit
-    libc.src.__support.FPUtil.dyadic_float
-    libc.src.__support.FPUtil.fenv_impl
-    libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.polyeval
-    libc.src.__support.FPUtil.rounding_mode
-    libc.src.__support.FPUtil.triple_double
-    libc.src.__support.integer_literals
-    libc.src.__support.macros.optimization
-    libc.src.__support.math.common_constants
-    libc.src.errno.errno
+    libc.src.__support.math.expm1
 )
 
 add_entrypoint_object(
@@ -1604,16 +1571,7 @@ add_entrypoint_object(
   HDRS
     ../expm1f.h
   DEPENDS
-    libc.src.__support.FPUtil.basic_operations
-    libc.src.__support.FPUtil.fenv_impl
-    libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.nearest_integer
-    libc.src.__support.FPUtil.polyeval
-    libc.src.__support.FPUtil.rounding_mode
-    libc.src.__support.macros.optimization
-    libc.src.__support.math.common_constants
-    libc.src.errno.errno
+    libc.src.__support.math.expm1f
 )
 
 add_entrypoint_object(
@@ -1623,17 +1581,7 @@ add_entrypoint_object(
   HDRS
     ../expm1f16.h
   DEPENDS
-    libc.hdr.errno_macros
-    libc.hdr.fenv_macros
-    libc.src.__support.FPUtil.cast
-    libc.src.__support.FPUtil.except_value_utils
-    libc.src.__support.FPUtil.fenv_impl
-    libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.polyeval
-    libc.src.__support.FPUtil.rounding_mode
-    libc.src.__support.macros.optimization
-    libc.src.__support.math.expxf16_utils
+    libc.src.__support.math.expm1f16
 )
 
 add_entrypoint_object(
@@ -4774,7 +4722,7 @@ add_entrypoint_object(
   HDRS
     ../fma.h
   DEPENDS
-    libc.src.__support.FPUtil.fma
+    libc.src.__support.math.fma
 )
 
 add_entrypoint_object(
diff --git a/libc/src/math/generic/exp2m1f.cpp b/libc/src/math/generic/exp2m1f.cpp
index 16244edb4c583..14d026f1cb84e 100644
--- a/libc/src/math/generic/exp2m1f.cpp
+++ b/libc/src/math/generic/exp2m1f.cpp
@@ -7,183 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/exp2m1f.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/except_value_utils.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/rounding_mode.h"
-#include "src/__support/common.h"
-#include "src/__support/libc_errno.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h"
-#include "src/__support/macros/properties/cpu_features.h"
-#include "src/__support/math/exp10f_utils.h"
+#include "src/__support/math/exp2m1f.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-static constexpr size_t N_EXCEPTS_LO = 8;
-
-static constexpr fputil::ExceptValues<float, N_EXCEPTS_LO> EXP2M1F_EXCEPTS_LO =
-    {{
-        // (input, RZ output, RU offset, RD offset, RN offset)
-        // x = 0x1.36dc8ep-36, exp2m1f(x) = 0x1.aef212p-37 (RZ)
-        {0x2d9b'6e47U, 0x2d57'7909U, 1U, 0U, 0U},
-        // x = 0x1.224936p-19, exp2m1f(x) = 0x1.926c0ep-20 (RZ)
-        {0x3611'249bU, 0x35c9'3607U, 1U, 0U, 1U},
-        // x = 0x1.d16d2p-20, exp2m1f(x) = 0x1.429becp-20 (RZ)
-        {0x35e8'b690U, 0x35a1'4df6U, 1U, 0U, 1U},
-        // x = 0x1.17949ep-14, exp2m1f(x) = 0x1.8397p-15 (RZ)
-        {0x388b'ca4fU, 0x3841'cb80U, 1U, 0U, 1U},
-        // x = -0x1.9c3e1ep-38, exp2m1f(x) = -0x1.1dbeacp-38 (RZ)
-        {0xacce'1f0fU, 0xac8e'df56U, 0U, 1U, 0U},
-        // x = -0x1.4d89b4p-32, exp2m1f(x) = -0x1.ce61b6p-33 (RZ)
-        {0xafa6'c4daU, 0xaf67'30dbU, 0U, 1U, 1U},
-        // x = -0x1.a6eac4p-10, exp2m1f(x) = -0x1.24fadap-10 (RZ)
-        {0xbad3'7562U, 0xba92'7d6dU, 0U, 1U, 1U},
-        // x = -0x1.e7526ep-6, exp2m1f(x) = -0x1.4e53dep-6 (RZ)
-        {0xbcf3'a937U, 0xbca7'29efU, 0U, 1U, 1U},
-    }};
-
-static constexpr size_t N_EXCEPTS_HI = 3;
-
-static constexpr fputil::ExceptValues<float, N_EXCEPTS_HI> EXP2M1F_EXCEPTS_HI =
-    {{
-        // (input, RZ output, RU offset, RD offset, RN offset)
-        // x = 0x1.16a972p-1, exp2m1f(x) = 0x1.d545b2p-2 (RZ)
-        {0x3f0b'54b9U, 0x3eea'a2d9U, 1U, 0U, 0U},
-        // x = -0x1.9f12acp-5, exp2m1f(x) = -0x1.1ab68cp-5 (RZ)
-        {0xbd4f'8956U, 0xbd0d'5b46U, 0U, 1U, 0U},
-        // x = -0x1.de7b9cp-5, exp2m1f(x) = -0x1.4508f4p-5 (RZ)
-        {0xbd6f'3dceU, 0xbd22'847aU, 0U, 1U, 1U},
-    }};
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-LLVM_LIBC_FUNCTION(float, exp2m1f, (float x)) {
-  using FPBits = fputil::FPBits<float>;
-  FPBits xbits(x);
-
-  uint32_t x_u = xbits.uintval();
-  uint32_t x_abs = x_u & 0x7fff'ffffU;
-
-  // When |x| >= 128, or x is nan, or |x| <= 2^-5
-  if (LIBC_UNLIKELY(x_abs >= 0x4300'0000U || x_abs <= 0x3d00'0000U)) {
-    // |x| <= 2^-5
-    if (x_abs <= 0x3d00'0000U) {
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-      if (auto r = EXP2M1F_EXCEPTS_LO.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
-        return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-      // Minimax polynomial generated by Sollya with:
-      // > display = hexadecimal;
-      // > fpminimax((2^x - 1)/x, 5, [|D...|], [-2^-5, 2^-5]);
-      constexpr double COEFFS[] = {
-          0x1.62e42fefa39f3p-1, 0x1.ebfbdff82c57bp-3,  0x1.c6b08d6f2d7aap-5,
-          0x1.3b2ab6fc92f5dp-7, 0x1.5d897cfe27125p-10, 0x1.43090e61e6af1p-13};
-      double xd = x;
-      double xsq = xd * xd;
-      double c0 = fputil::multiply_add(xd, COEFFS[1], COEFFS[0]);
-      double c1 = fputil::multiply_add(xd, COEFFS[3], COEFFS[2]);
-      double c2 = fputil::multiply_add(xd, COEFFS[5], COEFFS[4]);
-      double p = fputil::polyeval(xsq, c0, c1, c2);
-      return static_cast<float>(p * xd);
-    }
-
-    // x >= 128, or x is nan
-    if (xbits.is_pos()) {
-      if (xbits.is_finite()) {
-        int rounding = fputil::quick_get_round();
-        if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)
-          return FPBits::max_normal().get_val();
-
-        fputil::set_errno_if_required(ERANGE);
-        fputil::raise_except_if_required(FE_OVERFLOW);
-      }
-
-      // x >= 128 and 2^x - 1 rounds to +inf, or x is +inf or nan
-      return x + FPBits::inf().get_val();
-    }
-  }
-
-  if (LIBC_UNLIKELY(x <= -25.0f)) {
-    // 2^(-inf) - 1 = -1
-    if (xbits.is_inf())
-      return -1.0f;
-    // 2^nan - 1 = nan
-    if (xbits.is_nan())
-      return x;
-
-    int rounding = fputil::quick_get_round();
-    if (rounding == FE_UPWARD || rounding == FE_TOWARDZERO)
-      return -0x1.ffff'fep-1f; // -1.0f + 0x1.0p-24f
-
-    fputil::set_errno_if_required(ERANGE);
-    fputil::raise_except_if_required(FE_UNDERFLOW);
-    return -1.0f;
-  }
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-  if (auto r = EXP2M1F_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
-    return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-  // For -25 < x < 128, to compute 2^x, we perform the following range
-  // reduction: find hi, mid, lo such that:
-  //   x = hi + mid + lo, in which:
-  //     hi is an integer,
-  //     0 <= mid * 2^5 < 32 is an integer,
-  //     -2^(-6) <= lo <= 2^(-6).
-  // In particular,
-  //   hi + mid = round(x * 2^5) * 2^(-5).
-  // Then,
-  //   2^x = 2^(hi + mid + lo) = 2^hi * 2^mid * 2^lo.
-  // 2^mid is stored in the lookup table of 32 elements.
-  // 2^lo is computed using a degree-4 minimax polynomial generated by Sollya.
-  // We perform 2^hi * 2^mid by simply add hi to the exponent field of 2^mid.
-
-  // kf = (hi + mid) * 2^5 = round(x * 2^5)
-  float kf;
-  int k;
-#ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT
-  kf = fputil::nearest_integer(x * 32.0f);
-  k = static_cast<int>(kf);
-#else
-  constexpr float HALF[2] = {0.5f, -0.5f};
-  k = static_cast<int>(fputil::multiply_add(x, 32.0f, HALF[x < 0.0f]));
-  kf = static_cast<float>(k);
-#endif // LIBC_TARGET_CPU_HAS_NEAREST_INT
-
-  // lo = x - (hi + mid) = x - kf * 2^(-5)
-  double lo = fputil::multiply_add(-0x1.0p-5f, kf, x);
-
-  // hi = floor(kf * 2^(-4))
-  // exp2_hi = shift hi to the exponent field of double precision.
-  int64_t exp2_hi =
-      static_cast<int64_t>(static_cast<uint64_t>(k >> ExpBase::MID_BITS)
-                           << fputil::FPBits<double>::FRACTION_LEN);
-  // mh = 2^hi * 2^mid
-  // mh_bits = bit field of mh
-  int64_t mh_bits = ExpBase::EXP_2_MID[k & ExpBase::MID_MASK] + exp2_hi;
-  double mh = fputil::FPBits<double>(static_cast<uint64_t>(mh_bits)).get_val();
-
-  // Degree-4 polynomial approximating (2^x - 1)/x generated by Sollya with:
-  // > display = hexadecimal;
-  // > fpminimax((2^x - 1)/x, 4, [|D...|], [-2^-6, 2^-6]);
-  constexpr double COEFFS[5] = {0x1.62e42fefa39efp-1, 0x1.ebfbdff8131c4p-3,
-                                0x1.c6b08d7061695p-5, 0x1.3b2b1bee74b2ap-7,
-                                0x1.5d88091198529p-10};
-  double lo_sq = lo * lo;
-  double c1 = fputil::multiply_add(lo, COEFFS[0], 1.0);
-  double c2 = fputil::multiply_add(lo, COEFFS[2], COEFFS[1]);
-  double c3 = fputil::multiply_add(lo, COEFFS[4], COEFFS[3]);
-  double exp2_lo = fputil::polyeval(lo_sq, c1, c2, c3);
-  // 2^x - 1 = 2^(hi + mid + lo) - 1
-  //         = 2^(hi + mid) * 2^lo - 1
-  //         ~ mh * (1 + lo * P(lo)) - 1
-  //         = mh * exp2_lo - 1
-  return static_cast<float>(fputil::multiply_add(exp2_lo, mh, -1.0));
-}
+LLVM_LIBC_FUNCTION(float, exp2m1f, (float x)) { return math::exp2m1f(x); }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/exp2m1f16.cpp b/libc/src/math/generic/exp2m1f16.cpp
index ce0cc60748f19..497a2887cea4c 100644
--- a/libc/src/math/generic/exp2m1f16.cpp
+++ b/libc/src/math/generic/exp2m1f16.cpp
@@ -7,163 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/exp2m1f16.h"
-#include "hdr/errno_macros.h"
-#include "hdr/fenv_macros.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/cast.h"
-#include "src/__support/FPUtil/except_value_utils.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/rounding_mode.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h"
-#include "src/__support/macros/properties/cpu_features.h"
-#include "src/__support/math/expxf16_utils.h"
+#include "src/__support/math/exp2m1f16.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-static constexpr fputil::ExceptValues<float16, 6> EXP2M1F16_EXCEPTS_LO = {{
-    // (input, RZ output, RU offset, RD offset, RN offset)
-    // x = 0x1.cf4p-13, exp2m1f16(x) = 0x1.41p-13 (RZ)
-    {0x0b3dU, 0x0904U, 1U, 0U, 1U},
-    // x = 0x1.4fcp-12, exp2m1f16(x) = 0x1.d14p-13 (RZ)
-    {0x0d3fU, 0x0b45U, 1U, 0U, 1U},
-    // x = 0x1.63p-11, exp2m1f16(x) = 0x1.ec4p-12 (RZ)
-    {0x118cU, 0x0fb1U, 1U, 0U, 0U},
-    // x = 0x1.6fp-7, exp2m1f16(x) = 0x1.fe8p-8 (RZ)
-    {0x21bcU, 0x1ffaU, 1U, 0U, 1U},
-    // x = -0x1.c6p-10, exp2m1f16(x) = -0x1.3a8p-10 (RZ)
-    {0x9718U, 0x94eaU, 0U, 1U, 0U},
-    // x = -0x1.cfcp-10, exp2m1f16(x) = -0x1.414p-10 (RZ)
-    {0x973fU, 0x9505U, 0U, 1U, 0U},
-}};
-
-#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
-static constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 6;
-#else
-static constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 7;
-#endif
-
-static constexpr fputil::ExceptValues<float16, N_EXP2M1F16_EXCEPTS_HI>
-    EXP2M1F16_EXCEPTS_HI = {{
-        // (input, RZ output, RU offset, RD offset, RN offset)
-        // x = 0x1.e58p-3, exp2m1f16(x) = 0x1.6dcp-3 (RZ)
-        {0x3396U, 0x31b7U, 1U, 0U, 0U},
-#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
-        // x = 0x1.2e8p-2, exp2m1f16(x) = 0x1.d14p-3 (RZ)
-        {0x34baU, 0x3345U, 1U, 0U, 0U},
-#endif
-        // x = 0x1.ad8p-2, exp2m1f16(x) = 0x1.598p-2 (RZ)
-        {0x36b6U, 0x3566U, 1U, 0U, 0U},
-#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
-        // x = 0x1.edcp-2, exp2m1f16(x) = 0x1.964p-2 (RZ)
-        {0x37b7U, 0x3659U, 1U, 0U, 1U},
-#endif
-        // x = -0x1.804p-3, exp2m1f16(x) = -0x1.f34p-4 (RZ)
-        {0xb201U, 0xafcdU, 0U, 1U, 1U},
-        // x = -0x1.f3p-3, exp2m1f16(x) = -0x1.3e4p-3 (RZ)
-        {0xb3ccU, 0xb0f9U, 0U, 1U, 0U},
-        // x = -0x1.294p-1, exp2m1f16(x) = -0x1.53p-2 (RZ)
-        {0xb8a5U, 0xb54cU, 0U, 1U, 1U},
-#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
-        // x = -0x1.a34p-1, exp2m1f16(x) = -0x1.bb4p-2 (RZ)
-        {0xba8dU, 0xb6edU, 0U, 1U, 1U},
-#endif
-    }};
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
 LLVM_LIBC_FUNCTION(float16, exp2m1f16, (float16 x)) {
-  using namespace math::expxf16_internal;
-  using FPBits = fputil::FPBits<float16>;
-  FPBits x_bits(x);
-
-  uint16_t x_u = x_bits.uintval();
-  uint16_t x_abs = x_u & 0x7fffU;
-
-  // When |x| <= 2^(-3), or |x| >= 11, or x is NaN.
-  if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x4980U)) {
-    // exp2m1(NaN) = NaN
-    if (x_bits.is_nan()) {
-      if (x_bits.is_signaling_nan()) {
-        fputil::raise_except_if_required(FE_INVALID);
-        return FPBits::quiet_nan().get_val();
-      }
-
-      return x;
-    }
-
-    // When x >= 16.
-    if (x_u >= 0x4c00 && x_bits.is_pos()) {
-      // exp2m1(+inf) = +inf
-      if (x_bits.is_inf())
-        return FPBits::inf().get_val();
-
-      switch (fputil::quick_get_round()) {
-      case FE_TONEAREST:
-      case FE_UPWARD:
-        fputil::set_errno_if_required(ERANGE);
-        fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT);
-        return FPBits::inf().get_val();
-      default:
-        return FPBits::max_normal().get_val();
-      }
-    }
-
-    // When x < -11.
-    if (x_u > 0xc980U) {
-      // exp2m1(-inf) = -1
-      if (x_bits.is_inf())
-        return FPBits::one(Sign::NEG).get_val();
-
-      // When -12 < x < -11, round(2^x - 1, HP, RN) = -0x1.ffcp-1.
-      if (x_u < 0xca00U)
-        return fputil::round_result_slightly_down(
-            fputil::cast<float16>(-0x1.ffcp-1));
-
-      // When x <= -12, round(2^x - 1, HP, RN) = -1.
-      switch (fputil::quick_get_round()) {
-      case FE_TONEAREST:
-      case FE_DOWNWARD:
-        return FPBits::one(Sign::NEG).get_val();
-      default:
-        return fputil::cast<float16>(-0x1.ffcp-1);
-      }
-    }
-
-    // When |x| <= 2^(-3).
-    if (x_abs <= 0x3000U) {
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-      if (auto r = EXP2M1F16_EXCEPTS_LO.lookup(x_u);
-          LIBC_UNLIKELY(r.has_value()))
-        return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-      float xf = x;
-      // Degree-5 minimax polynomial generated by Sollya with the following
-      // commands:
-      //   > display = hexadecimal;
-      //   > P = fpminimax((2^x - 1)/x, 4, [|SG...|], [-2^-3, 2^-3]);
-      //   > x * P;
-      return fputil::cast<float16>(
-          xf * fputil::polyeval(xf, 0x1.62e43p-1f, 0x1.ebfbdep-3f,
-                                0x1.c6af88p-5f, 0x1.3b45d6p-7f,
-                                0x1.641e7cp-10f));
-    }
-  }
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-  if (auto r = EXP2M1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
-    return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-  // exp2(x) = exp2(hi + mid) * exp2(lo)
-  auto [exp2_hi_mid, exp2_lo] = exp2_range_reduction(x);
-  // exp2m1(x) = exp2(hi + mid) * exp2(lo) - 1
-  return fputil::cast<float16>(
-      fputil::multiply_add(exp2_hi_mid, exp2_lo, -1.0f));
+  return math::exp2m1f16(x);
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/expm1.cpp b/libc/src/math/generic/expm1.cpp
index a3d0c1aa5261c..c410ae0a33a2a 100644
--- a/libc/src/math/generic/expm1.cpp
+++ b/libc/src/math/generic/expm1.cpp
@@ -7,498 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/expm1.h"
-#include "src/__support/CPP/bit.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/double_double.h"
-#include "src/__support/FPUtil/dyadic_float.h"
-#include "src/__support/FPUtil/except_value_utils.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/rounding_mode.h"
-#include "src/__support/FPUtil/triple_double.h"
-#include "src/__support/common.h"
-#include "src/__support/integer_literals.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include "src/__support/math/common_constants.h" // Lookup tables EXP_M1 and EXP_M2.
-#include "src/__support/math/exp_constants.h"
-
-#if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0)
-#define LIBC_MATH_EXPM1_SKIP_ACCURATE_PASS
-#endif
+#include "src/__support/math/expm1.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
-using fputil::DoubleDouble;
-using fputil::TripleDouble;
-using Float128 = typename fputil::DyadicFloat<128>;
-
-using LIBC_NAMESPACE::operator""_u128;
-
-// log2(e)
-constexpr double LOG2_E = 0x1.71547652b82fep+0;
-
-// Error bounds:
-// Errors when using double precision.
-// 0x1.8p-63;
-constexpr uint64_t ERR_D = 0x3c08000000000000;
-// Errors when using double-double precision.
-// 0x1.0p-99
-[[maybe_unused]] constexpr uint64_t ERR_DD = 0x39c0000000000000;
-
-// -2^-12 * log(2)
-// > a = -2^-12 * log(2);
-// > b = round(a, 30, RN);
-// > c = round(a - b, 30, RN);
-// > d = round(a - b - c, D, RN);
-// Errors < 1.5 * 2^-133
-constexpr double MLOG_2_EXP2_M12_HI = -0x1.62e42ffp-13;
-constexpr double MLOG_2_EXP2_M12_MID = 0x1.718432a1b0e26p-47;
-constexpr double MLOG_2_EXP2_M12_MID_30 = 0x1.718432ap-47;
-constexpr double MLOG_2_EXP2_M12_LO = 0x1.b0e2633fe0685p-79;
-
-namespace {
-
-using namespace common_constants_internal;
-
-// Polynomial approximations with double precision:
-// Return expm1(dx) / x ~ 1 + dx / 2 + dx^2 / 6 + dx^3 / 24.
-// For |dx| < 2^-13 + 2^-30:
-//   | output - expm1(dx) / dx | < 2^-51.
-LIBC_INLINE double poly_approx_d(double dx) {
-  // dx^2
-  double dx2 = dx * dx;
-  // c0 = 1 + dx / 2
-  double c0 = fputil::multiply_add(dx, 0.5, 1.0);
-  // c1 = 1/6 + dx / 24
-  double c1 =
-      fputil::multiply_add(dx, 0x1.5555555555555p-5, 0x1.5555555555555p-3);
-  // p = dx^2 * c1 + c0 = 1 + dx / 2 + dx^2 / 6 + dx^3 / 24
-  double p = fputil::multiply_add(dx2, c1, c0);
-  return p;
-}
-
-// Polynomial approximation with double-double precision:
-// Return expm1(dx) / dx ~ 1 + dx / 2 + dx^2 / 6 + ... + dx^6 / 5040
-// For |dx| < 2^-13 + 2^-30:
-//   | output - expm1(dx) | < 2^-101
-DoubleDouble poly_approx_dd(const DoubleDouble &dx) {
-  // Taylor polynomial.
-  constexpr DoubleDouble COEFFS[] = {
-      {0, 0x1p0},                                      // 1
-      {0, 0x1p-1},                                     // 1/2
-      {0x1.5555555555555p-57, 0x1.5555555555555p-3},   // 1/6
-      {0x1.5555555555555p-59, 0x1.5555555555555p-5},   // 1/24
-      {0x1.1111111111111p-63, 0x1.1111111111111p-7},   // 1/120
-      {-0x1.f49f49f49f49fp-65, 0x1.6c16c16c16c17p-10}, // 1/720
-      {0x1.a01a01a01a01ap-73, 0x1.a01a01a01a01ap-13},  // 1/5040
-  };
-
-  DoubleDouble p = fputil::polyeval(dx, COEFFS[0], COEFFS[1], COEFFS[2],
-                                    COEFFS[3], COEFFS[4], COEFFS[5], COEFFS[6]);
-  return p;
-}
-
-// Polynomial approximation with 128-bit precision:
-// Return (exp(dx) - 1)/dx ~ 1 + dx / 2 + dx^2 / 6 + ... + dx^6 / 5040
-// For |dx| < 2^-13 + 2^-30:
-//   | output - exp(dx) | < 2^-126.
-[[maybe_unused]] Float128 poly_approx_f128(const Float128 &dx) {
-  constexpr Float128 COEFFS_128[]{
-      {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0
-      {Sign::POS, -128, 0x80000000'00000000'00000000'00000000_u128}, // 0.5
-      {Sign::POS, -130, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, // 1/6
-      {Sign::POS, -132, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, // 1/24
-      {Sign::POS, -134, 0x88888888'88888888'88888888'88888889_u128}, // 1/120
-      {Sign::POS, -137, 0xb60b60b6'0b60b60b'60b60b60'b60b60b6_u128}, // 1/720
-      {Sign::POS, -140, 0xd00d00d0'0d00d00d'00d00d00'd00d00d0_u128}, // 1/5040
-  };
-
-  Float128 p = fputil::polyeval(dx, COEFFS_128[0], COEFFS_128[1], COEFFS_128[2],
-                                COEFFS_128[3], COEFFS_128[4], COEFFS_128[5],
-                                COEFFS_128[6]);
-  return p;
-}
-
-#ifdef DEBUGDEBUG
-std::ostream &operator<<(std::ostream &OS, const Float128 &r) {
-  OS << (r.sign == Sign::NEG ? "-(" : "(") << r.mantissa.val[0] << " + "
-     << r.mantissa.val[1] << " * 2^64) * 2^" << r.exponent << "\n";
-  return OS;
-}
-
-std::ostream &operator<<(std::ostream &OS, const DoubleDouble &r) {
-  OS << std::hexfloat << "(" << r.hi << " + " << r.lo << ")"
-     << std::defaultfloat << "\n";
-  return OS;
-}
-#endif
-
-// Compute exp(x) - 1 using 128-bit precision.
-// TODO(lntue): investigate triple-double precision implementation for this
-// step.
-[[maybe_unused]] Float128 expm1_f128(double x, double kd, int idx1, int idx2) {
-  // Recalculate dx:
-
-  double t1 = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact
-  double t2 = kd * MLOG_2_EXP2_M12_MID_30;                     // exact
-  double t3 = kd * MLOG_2_EXP2_M12_LO;                         // Error < 2^-133
-
-  Float128 dx = fputil::quick_add(
-      Float128(t1), fputil::quick_add(Float128(t2), Float128(t3)));
-
-  // TODO: Skip recalculating exp_mid1 and exp_mid2.
-  Float128 exp_mid1 =
-      fputil::quick_add(Float128(EXP2_MID1[idx1].hi),
-                        fputil::quick_add(Float128(EXP2_MID1[idx1].mid),
-                                          Float128(EXP2_MID1[idx1].lo)));
-
-  Float128 exp_mid2 =
-      fputil::quick_add(Float128(EXP2_MID2[idx2].hi),
-                        fputil::quick_add(Float128(EXP2_MID2[idx2].mid),
-                                          Float128(EXP2_MID2[idx2].lo)));
-
-  Float128 exp_mid = fputil::quick_mul(exp_mid1, exp_mid2);
-
-  int hi = static_cast<int>(kd) >> 12;
-  Float128 minus_one{Sign::NEG, -127 - hi,
-                     0x80000000'00000000'00000000'00000000_u128};
-
-  Float128 exp_mid_m1 = fputil::quick_add(exp_mid, minus_one);
-
-  Float128 p = poly_approx_f128(dx);
-
-  // r = exp_mid * (1 + dx * P) - 1
-  //   = (exp_mid - 1) + (dx * exp_mid) * P
-  Float128 r =
-      fputil::multiply_add(fputil::quick_mul(exp_mid, dx), p, exp_mid_m1);
-
-  r.exponent += hi;
-
-#ifdef DEBUGDEBUG
-  std::cout << "=== VERY SLOW PASS ===\n"
-            << "        kd: " << kd << "\n"
-            << "        hi: " << hi << "\n"
-            << " minus_one: " << minus_one << "        dx: " << dx
-            << "exp_mid_m1: " << exp_mid_m1 << "   exp_mid: " << exp_mid
-            << "         p: " << p << "         r: " << r << std::endl;
-#endif
-
-  return r;
-}
-
-// Compute exp(x) - 1 with double-double precision.
-DoubleDouble exp_double_double(double x, double kd, const DoubleDouble &exp_mid,
-                               const DoubleDouble &hi_part) {
-  // Recalculate dx:
-  //   dx = x - k * 2^-12 * log(2)
-  double t1 = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact
-  double t2 = kd * MLOG_2_EXP2_M12_MID_30;                     // exact
-  double t3 = kd * MLOG_2_EXP2_M12_LO;                         // Error < 2^-130
-
-  DoubleDouble dx = fputil::exact_add(t1, t2);
-  dx.lo += t3;
-
-  // Degree-6 Taylor polynomial approximation in double-double precision.
-  // | p - exp(x) | < 2^-100.
-  DoubleDouble p = poly_approx_dd(dx);
-
-  // Error bounds: 2^-99.
-  DoubleDouble r =
-      fputil::multiply_add(fputil::quick_mult(exp_mid, dx), p, hi_part);
-
-#ifdef DEBUGDEBUG
-  std::cout << "=== SLOW PASS ===\n"
-            << "   dx: " << dx << "    p: " << p << "    r: " << r << std::endl;
-#endif
-
-  return r;
-}
-
-// Check for exceptional cases when
-// |x| <= 2^-53 or x < log(2^-54) or x >= 0x1.6232bdd7abcd3p+9
-double set_exceptional(double x) {
-  using FPBits = typename fputil::FPBits<double>;
-  FPBits xbits(x);
-
-  uint64_t x_u = xbits.uintval();
-  uint64_t x_abs = xbits.abs().uintval();
-
-  // |x| <= 2^-53.
-  if (x_abs <= 0x3ca0'0000'0000'0000ULL) {
-    // expm1(x) ~ x.
-
-    if (LIBC_UNLIKELY(x_abs <= 0x0370'0000'0000'0000ULL)) {
-      if (LIBC_UNLIKELY(x_abs == 0))
-        return x;
-      // |x| <= 2^-968, need to scale up a bit before rounding, then scale it
-      // back down.
-      return 0x1.0p-200 * fputil::multiply_add(x, 0x1.0p+200, 0x1.0p-1022);
-    }
-
-    // 2^-968 < |x| <= 2^-53.
-    return fputil::round_result_slightly_up(x);
-  }
-
-  // x < log(2^-54) || x >= 0x1.6232bdd7abcd3p+9 or inf/nan.
-
-  // x < log(2^-54) or -inf/nan
-  if (x_u >= 0xc042'b708'8723'20e2ULL) {
-    // expm1(-Inf) = -1
-    if (xbits.is_inf())
-      return -1.0;
-
-    // exp(nan) = nan
-    if (xbits.is_nan())
-      return x;
-
-    return fputil::round_result_slightly_up(-1.0);
-  }
-
-  // x >= round(log(MAX_NORMAL), D, RU) = 0x1.62e42fefa39fp+9 or +inf/nan
-  // x is finite
-  if (x_u < 0x7ff0'0000'0000'0000ULL) {
-    int rounding = fputil::quick_get_round();
-    if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)
-      return FPBits::max_normal().get_val();
-
-    fputil::set_errno_if_required(ERANGE);
-    fputil::raise_except_if_required(FE_OVERFLOW);
-  }
-  // x is +inf or nan
-  return x + FPBits::inf().get_val();
-}
-
-} // namespace
-
-LLVM_LIBC_FUNCTION(double, expm1, (double x)) {
-  using FPBits = typename fputil::FPBits<double>;
-
-  FPBits xbits(x);
-
-  bool x_is_neg = xbits.is_neg();
-  uint64_t x_u = xbits.uintval();
-
-  // Upper bound: max normal number = 2^1023 * (2 - 2^-52)
-  // > round(log (2^1023 ( 2 - 2^-52 )), D, RU) = 0x1.62e42fefa39fp+9
-  // > round(log (2^1023 ( 2 - 2^-52 )), D, RD) = 0x1.62e42fefa39efp+9
-  // > round(log (2^1023 ( 2 - 2^-52 )), D, RN) = 0x1.62e42fefa39efp+9
-  // > round(exp(0x1.62e42fefa39fp+9), D, RN) = infty
-
-  // Lower bound: log(2^-54) = -0x1.2b708872320e2p5
-  // > round(log(2^-54), D, RN) = -0x1.2b708872320e2p5
-
-  // x < log(2^-54) or x >= 0x1.6232bdd7abcd3p+9 or |x| <= 2^-53.
-
-  if (LIBC_UNLIKELY(x_u >= 0xc042b708872320e2 ||
-                    (x_u <= 0xbca0000000000000 && x_u >= 0x40862e42fefa39f0) ||
-                    x_u <= 0x3ca0000000000000)) {
-    return set_exceptional(x);
-  }
-
-  // Now log(2^-54) <= x <= -2^-53 or 2^-53 <= x < log(2^1023 * (2 - 2^-52))
-
-  // Range reduction:
-  // Let x = log(2) * (hi + mid1 + mid2) + lo
-  // in which:
-  //   hi is an integer
-  //   mid1 * 2^6 is an integer
-  //   mid2 * 2^12 is an integer
-  // then:
-  //   exp(x) = 2^hi * 2^(mid1) * 2^(mid2) * exp(lo).
-  // With this formula:
-  //   - multiplying by 2^hi is exact and cheap, simply by adding the exponent
-  //     field.
-  //   - 2^(mid1) and 2^(mid2) are stored in 2 x 64-element tables.
-  //   - exp(lo) ~ 1 + lo + a0 * lo^2 + ...
-  //
-  // They can be defined by:
-  //   hi + mid1 + mid2 = 2^(-12) * round(2^12 * log_2(e) * x)
-  // If we store L2E = round(log2(e), D, RN), then:
-  //   log2(e) - L2E ~ 1.5 * 2^(-56)
-  // So the errors when computing in double precision is:
-  //   | x * 2^12 * log_2(e) - D(x * 2^12 * L2E) | <=
-  //  <= | x * 2^12 * log_2(e) - x * 2^12 * L2E | +
-  //     + | x * 2^12 * L2E - D(x * 2^12 * L2E) |
-  //  <= 2^12 * ( |x| * 1.5 * 2^-56 + eps(x))  for RN
-  //     2^12 * ( |x| * 1.5 * 2^-56 + 2*eps(x)) for other rounding modes.
-  // So if:
-  //   hi + mid1 + mid2 = 2^(-12) * round(x * 2^12 * L2E) is computed entirely
-  // in double precision, the reduced argument:
-  //   lo = x - log(2) * (hi + mid1 + mid2) is bounded by:
-  //   |lo| <= 2^-13 + (|x| * 1.5 * 2^-56 + 2*eps(x))
-  //         < 2^-13 + (1.5 * 2^9 * 1.5 * 2^-56 + 2*2^(9 - 52))
-  //         < 2^-13 + 2^-41
-  //
-
-  // The following trick computes the round(x * L2E) more efficiently
-  // than using the rounding instructions, with the tradeoff for less accuracy,
-  // and hence a slightly larger range for the reduced argument `lo`.
-  //
-  // To be precise, since |x| < |log(2^-1075)| < 1.5 * 2^9,
-  //   |x * 2^12 * L2E| < 1.5 * 2^9 * 1.5 < 2^23,
-  // So we can fit the rounded result round(x * 2^12 * L2E) in int32_t.
-  // Thus, the goal is to be able to use an additional addition and fixed width
-  // shift to get an int32_t representing round(x * 2^12 * L2E).
-  //
-  // Assuming int32_t using 2-complement representation, since the mantissa part
-  // of a double precision is unsigned with the leading bit hidden, if we add an
-  // extra constant C = 2^e1 + 2^e2 with e1 > e2 >= 2^25 to the product, the
-  // part that are < 2^e2 in resulted mantissa of (x*2^12*L2E + C) can be
-  // considered as a proper 2-complement representations of x*2^12*L2E.
-  //
-  // One small problem with this approach is that the sum (x*2^12*L2E + C) in
-  // double precision is rounded to the least significant bit of the dorminant
-  // factor C.  In order to minimize the rounding errors from this addition, we
-  // want to minimize e1.  Another constraint that we want is that after
-  // shifting the mantissa so that the least significant bit of int32_t
-  // corresponds to the unit bit of (x*2^12*L2E), the sign is correct without
-  // any adjustment.  So combining these 2 requirements, we can choose
-  //   C = 2^33 + 2^32, so that the sign bit corresponds to 2^31 bit, and hence
-  // after right shifting the mantissa, the resulting int32_t has correct sign.
-  // With this choice of C, the number of mantissa bits we need to shift to the
-  // right is: 52 - 33 = 19.
-  //
-  // Moreover, since the integer right shifts are equivalent to rounding down,
-  // we can add an extra 0.5 so that it will become round-to-nearest, tie-to-
-  // +infinity.  So in particular, we can compute:
-  //   hmm = x * 2^12 * L2E + C,
-  // where C = 2^33 + 2^32 + 2^-1, then if
-  //   k = int32_t(lower 51 bits of double(x * 2^12 * L2E + C) >> 19),
-  // the reduced argument:
-  //   lo = x - log(2) * 2^-12 * k is bounded by:
-  //   |lo| <= 2^-13 + 2^-41 + 2^-12*2^-19
-  //         = 2^-13 + 2^-31 + 2^-41.
-  //
-  // Finally, notice that k only uses the mantissa of x * 2^12 * L2E, so the
-  // exponent 2^12 is not needed.  So we can simply define
-  //   C = 2^(33 - 12) + 2^(32 - 12) + 2^(-13 - 12), and
-  //   k = int32_t(lower 51 bits of double(x * L2E + C) >> 19).
-
-  // Rounding errors <= 2^-31 + 2^-41.
-  double tmp = fputil::multiply_add(x, LOG2_E, 0x1.8000'0000'4p21);
-  int k = static_cast<int>(cpp::bit_cast<uint64_t>(tmp) >> 19);
-  double kd = static_cast<double>(k);
-
-  uint32_t idx1 = (k >> 6) & 0x3f;
-  uint32_t idx2 = k & 0x3f;
-  int hi = k >> 12;
-
-  DoubleDouble exp_mid1{EXP2_MID1[idx1].mid, EXP2_MID1[idx1].hi};
-  DoubleDouble exp_mid2{EXP2_MID2[idx2].mid, EXP2_MID2[idx2].hi};
-
-  DoubleDouble exp_mid = fputil::quick_mult(exp_mid1, exp_mid2);
-
-  // -2^(-hi)
-  double one_scaled =
-      FPBits::create_value(Sign::NEG, FPBits::EXP_BIAS - hi, 0).get_val();
-
-  // 2^(mid1 + mid2) - 2^(-hi)
-  DoubleDouble hi_part = x_is_neg ? fputil::exact_add(one_scaled, exp_mid.hi)
-                                  : fputil::exact_add(exp_mid.hi, one_scaled);
-
-  hi_part.lo += exp_mid.lo;
-
-  // |x - (hi + mid1 + mid2) * log(2) - dx| < 2^11 * eps(M_LOG_2_EXP2_M12.lo)
-  //                                        = 2^11 * 2^-13 * 2^-52
-  //                                        = 2^-54.
-  // |dx| < 2^-13 + 2^-30.
-  double lo_h = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact
-  double dx = fputil::multiply_add(kd, MLOG_2_EXP2_M12_MID, lo_h);
-
-  // We use the degree-4 Taylor polynomial to approximate exp(lo):
-  //   exp(lo) ~ 1 + lo + lo^2 / 2 + lo^3 / 6 + lo^4 / 24 = 1 + lo * P(lo)
-  // So that the errors are bounded by:
-  //   |P(lo) - expm1(lo)/lo| < |lo|^4 / 64 < 2^(-13 * 4) / 64 = 2^-58
-  // Let P_ be an evaluation of P where all intermediate computations are in
-  // double precision.  Using either Horner's or Estrin's schemes, the evaluated
-  // errors can be bounded by:
-  //      |P_(dx) - P(dx)| < 2^-51
-  //   => |dx * P_(dx) - expm1(lo) | < 1.5 * 2^-64
-  //   => 2^(mid1 + mid2) * |dx * P_(dx) - expm1(lo)| < 1.5 * 2^-63.
-  // Since we approximate
-  //   2^(mid1 + mid2) ~ exp_mid.hi + exp_mid.lo,
-  // We use the expression:
-  //    (exp_mid.hi + exp_mid.lo) * (1 + dx * P_(dx)) ~
-  //  ~ exp_mid.hi + (exp_mid.hi * dx * P_(dx) + exp_mid.lo)
-  // with errors bounded by 1.5 * 2^-63.
-
-  // Finally, we have the following approximation formula:
-  //   expm1(x) = 2^hi * 2^(mid1 + mid2) * exp(lo) - 1
-  //            = 2^hi * ( 2^(mid1 + mid2) * exp(lo) - 2^(-hi) )
-  //            ~ 2^hi * ( (exp_mid.hi - 2^-hi) +
-  //                       + (exp_mid.hi * dx * P_(dx) + exp_mid.lo))
-
-  double mid_lo = dx * exp_mid.hi;
-
-  // Approximate expm1(dx)/dx ~ 1 + dx / 2 + dx^2 / 6 + dx^3 / 24.
-  double p = poly_approx_d(dx);
-
-  double lo = fputil::multiply_add(p, mid_lo, hi_part.lo);
-
-  // TODO: The following line leaks encoding abstraction. Use FPBits methods
-  // instead.
-  uint64_t err = x_is_neg ? (static_cast<uint64_t>(-hi) << 52) : 0;
-
-  double err_d = cpp::bit_cast<double>(ERR_D + err);
-
-  double upper = hi_part.hi + (lo + err_d);
-  double lower = hi_part.hi + (lo - err_d);
-
-#ifdef DEBUGDEBUG
-  std::cout << "=== FAST PASS ===\n"
-            << "      x: " << std::hexfloat << x << std::defaultfloat << "\n"
-            << "      k: " << k << "\n"
-            << "   idx1: " << idx1 << "\n"
-            << "   idx2: " << idx2 << "\n"
-            << "     hi: " << hi << "\n"
-            << "     dx: " << std::hexfloat << dx << std::defaultfloat << "\n"
-            << "exp_mid: " << exp_mid << "hi_part: " << hi_part
-            << " mid_lo: " << std::hexfloat << mid_lo << std::defaultfloat
-            << "\n"
-            << "      p: " << std::hexfloat << p << std::defaultfloat << "\n"
-            << "     lo: " << std::hexfloat << lo << std::defaultfloat << "\n"
-            << "  upper: " << std::hexfloat << upper << std::defaultfloat
-            << "\n"
-            << "  lower: " << std::hexfloat << lower << std::defaultfloat
-            << "\n"
-            << std::endl;
-#endif
-
-  if (LIBC_LIKELY(upper == lower)) {
-    // to multiply by 2^hi, a fast way is to simply add hi to the exponent
-    // field.
-    int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
-    double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper));
-    return r;
-  }
-
-  // Use double-double
-  DoubleDouble r_dd = exp_double_double(x, kd, exp_mid, hi_part);
-
-#ifdef LIBC_MATH_EXPM1_SKIP_ACCURATE_PASS
-  int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
-  double r =
-      cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(r_dd.hi + r_dd.lo));
-  return r;
-#else
-  double err_dd = cpp::bit_cast<double>(ERR_DD + err);
-
-  double upper_dd = r_dd.hi + (r_dd.lo + err_dd);
-  double lower_dd = r_dd.hi + (r_dd.lo - err_dd);
-
-  if (LIBC_LIKELY(upper_dd == lower_dd)) {
-    int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
-    double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper_dd));
-    return r;
-  }
-
-  // Use 128-bit precision
-  Float128 r_f128 = expm1_f128(x, kd, idx1, idx2);
-
-  return static_cast<double>(r_f128);
-#endif // LIBC_MATH_EXPM1_SKIP_ACCURATE_PASS
-}
+LLVM_LIBC_FUNCTION(double, expm1, (double x)) { return math::expm1(x); }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/expm1f.cpp b/libc/src/math/generic/expm1f.cpp
index 72c8aa358d618..60d3bfe814963 100644
--- a/libc/src/math/generic/expm1f.cpp
+++ b/libc/src/math/generic/expm1f.cpp
@@ -7,168 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/expm1f.h"
-#include "src/__support/FPUtil/BasicOperations.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FMA.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/nearest_integer.h"
-#include "src/__support/FPUtil/rounding_mode.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h"            // LIBC_UNLIKELY
-#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
-#include "src/__support/math/common_constants.h" // Lookup tables EXP_M1 and EXP_M2.
+#include "src/__support/math/expm1f.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
-LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
-  using namespace common_constants_internal;
-  using FPBits = typename fputil::FPBits<float>;
-  FPBits xbits(x);
-
-  uint32_t x_u = xbits.uintval();
-  uint32_t x_abs = x_u & 0x7fff'ffffU;
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-  // Exceptional value
-  if (LIBC_UNLIKELY(x_u == 0x3e35'bec5U)) { // x = 0x1.6b7d8ap-3f
-    int round_mode = fputil::quick_get_round();
-    if (round_mode == FE_TONEAREST || round_mode == FE_UPWARD)
-      return 0x1.8dbe64p-3f;
-    return 0x1.8dbe62p-3f;
-  }
-#if !defined(LIBC_TARGET_CPU_HAS_FMA_DOUBLE)
-  if (LIBC_UNLIKELY(x_u == 0xbdc1'c6cbU)) { // x = -0x1.838d96p-4f
-    int round_mode = fputil::quick_get_round();
-    if (round_mode == FE_TONEAREST || round_mode == FE_DOWNWARD)
-      return -0x1.71c884p-4f;
-    return -0x1.71c882p-4f;
-  }
-#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-  // When |x| > 25*log(2), or nan
-  if (LIBC_UNLIKELY(x_abs >= 0x418a'a123U)) {
-    // x < log(2^-25)
-    if (xbits.is_neg()) {
-      // exp(-Inf) = 0
-      if (xbits.is_inf())
-        return -1.0f;
-      // exp(nan) = nan
-      if (xbits.is_nan())
-        return x;
-      int round_mode = fputil::quick_get_round();
-      if (round_mode == FE_UPWARD || round_mode == FE_TOWARDZERO)
-        return -0x1.ffff'fep-1f; // -1.0f + 0x1.0p-24f
-      return -1.0f;
-    } else {
-      // x >= 89 or nan
-      if (xbits.uintval() >= 0x42b2'0000) {
-        if (xbits.uintval() < 0x7f80'0000U) {
-          int rounding = fputil::quick_get_round();
-          if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)
-            return FPBits::max_normal().get_val();
-
-          fputil::set_errno_if_required(ERANGE);
-          fputil::raise_except_if_required(FE_OVERFLOW);
-        }
-        return x + FPBits::inf().get_val();
-      }
-    }
-  }
-
-  // |x| < 2^-4
-  if (x_abs < 0x3d80'0000U) {
-    // |x| < 2^-25
-    if (x_abs < 0x3300'0000U) {
-      // x = -0.0f
-      if (LIBC_UNLIKELY(xbits.uintval() == 0x8000'0000U))
-        return x;
-        // When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x
-        // is:
-        //   |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x|
-        //                               = |x|
-        //                               < 2^-25
-        //                               < epsilon(1)/2.
-        // So the correctly rounded values of expm1(x) are:
-        //   = x + eps(x) if rounding mode = FE_UPWARD,
-        //                   or (rounding mode = FE_TOWARDZERO and x is
-        //                   negative),
-        //   = x otherwise.
-        // To simplify the rounding decision and make it more efficient, we use
-        //   fma(x, x, x) ~ x + x^2 instead.
-        // Note: to use the formula x + x^2 to decide the correct rounding, we
-        // do need fma(x, x, x) to prevent underflow caused by x*x when |x| <
-        // 2^-76. For targets without FMA instructions, we simply use double for
-        // intermediate results as it is more efficient than using an emulated
-        // version of FMA.
-#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
-      return fputil::multiply_add(x, x, x);
-#else
-      double xd = x;
-      return static_cast<float>(fputil::multiply_add(xd, xd, xd));
-#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
-    }
-
-    constexpr double COEFFS[] = {0x1p-1,
-                                 0x1.55555555557ddp-3,
-                                 0x1.55555555552fap-5,
-                                 0x1.111110fcd58b7p-7,
-                                 0x1.6c16c1717660bp-10,
-                                 0x1.a0241f0006d62p-13,
-                                 0x1.a01e3f8d3c06p-16};
-
-    // 2^-25 <= |x| < 2^-4
-    double xd = static_cast<double>(x);
-    double xsq = xd * xd;
-    // Degree-8 minimax polynomial generated by Sollya with:
-    // > display = hexadecimal;
-    // > P = fpminimax((expm1(x) - x)/x^2, 6, [|D...|], [-2^-4, 2^-4]);
-
-    double c0 = fputil::multiply_add(xd, COEFFS[1], COEFFS[0]);
-    double c1 = fputil::multiply_add(xd, COEFFS[3], COEFFS[2]);
-    double c2 = fputil::multiply_add(xd, COEFFS[5], COEFFS[4]);
-
-    double r = fputil::polyeval(xsq, c0, c1, c2, COEFFS[6]);
-    return static_cast<float>(fputil::multiply_add(r, xsq, xd));
-  }
-
-  // For -18 < x < 89, to compute expm1(x), we perform the following range
-  // reduction: find hi, mid, lo such that:
-  //   x = hi + mid + lo, in which
-  //     hi is an integer,
-  //     mid * 2^7 is an integer
-  //     -2^(-8) <= lo < 2^-8.
-  // In particular,
-  //   hi + mid = round(x * 2^7) * 2^(-7).
-  // Then,
-  //   expm1(x) = exp(hi + mid + lo) - 1 = exp(hi) * exp(mid) * exp(lo) - 1.
-  // We store exp(hi) and exp(mid) in the lookup tables EXP_M1 and EXP_M2
-  // respectively.  exp(lo) is computed using a degree-4 minimax polynomial
-  // generated by Sollya.
-
-  // x_hi = hi + mid.
-  float kf = fputil::nearest_integer(x * 0x1.0p7f);
-  int x_hi = static_cast<int>(kf);
-  // Subtract (hi + mid) from x to get lo.
-  double xd = static_cast<double>(fputil::multiply_add(kf, -0x1.0p-7f, x));
-  x_hi += 104 << 7;
-  // hi = x_hi >> 7
-  double exp_hi = EXP_M1[x_hi >> 7];
-  // lo = x_hi & 0x0000'007fU;
-  double exp_mid = EXP_M2[x_hi & 0x7f];
-  double exp_hi_mid = exp_hi * exp_mid;
-  // Degree-4 minimax polynomial generated by Sollya with the following
-  // commands:
-  //   > display = hexadecimal;
-  //   > Q = fpminimax(expm1(x)/x, 3, [|D...|], [-2^-8, 2^-8]);
-  //   > Q;
-  double exp_lo =
-      fputil::polyeval(xd, 0x1.0p0, 0x1.ffffffffff777p-1, 0x1.000000000071cp-1,
-                       0x1.555566668e5e7p-3, 0x1.55555555ef243p-5);
-  return static_cast<float>(fputil::multiply_add(exp_hi_mid, exp_lo, -1.0));
-}
+LLVM_LIBC_FUNCTION(float, expm1f, (float x)) { return math::expm1f(x); }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/expm1f16.cpp b/libc/src/math/generic/expm1f16.cpp
index c2231f0aca715..68bf21df1721e 100644
--- a/libc/src/math/generic/expm1f16.cpp
+++ b/libc/src/math/generic/expm1f16.cpp
@@ -7,135 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/expm1f16.h"
-#include "hdr/errno_macros.h"
-#include "hdr/fenv_macros.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/cast.h"
-#include "src/__support/FPUtil/except_value_utils.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/rounding_mode.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h"
-#include "src/__support/math/expxf16_utils.h"
+#include "src/__support/math/expm1f16.h"
 
 namespace LIBC_NAMESPACE_DECL {
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-static constexpr fputil::ExceptValues<float16, 1> EXPM1F16_EXCEPTS_LO = {{
-    // (input, RZ output, RU offset, RD offset, RN offset)
-    // x = 0x1.564p-5, expm1f16(x) = 0x1.5d4p-5 (RZ)
-    {0x2959U, 0x2975U, 1U, 0U, 1U},
-}};
-
-#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
-static constexpr size_t N_EXPM1F16_EXCEPTS_HI = 2;
-#else
-static constexpr size_t N_EXPM1F16_EXCEPTS_HI = 3;
-#endif
-
-static constexpr fputil::ExceptValues<float16, N_EXPM1F16_EXCEPTS_HI>
-    EXPM1F16_EXCEPTS_HI = {{
-        // (input, RZ output, RU offset, RD offset, RN offset)
-        // x = 0x1.c34p+0, expm1f16(x) = 0x1.34cp+2 (RZ)
-        {0x3f0dU, 0x44d3U, 1U, 0U, 1U},
-        // x = -0x1.e28p-3, expm1f16(x) = -0x1.adcp-3 (RZ)
-        {0xb38aU, 0xb2b7U, 0U, 1U, 1U},
-#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
-        // x = 0x1.a08p-3, exp10m1f(x) = 0x1.cdcp-3 (RZ)
-        {0x3282U, 0x3337U, 1U, 0U, 0U},
-#endif
-    }};
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-LLVM_LIBC_FUNCTION(float16, expm1f16, (float16 x)) {
-  using namespace math::expxf16_internal;
-  using FPBits = fputil::FPBits<float16>;
-  FPBits x_bits(x);
-
-  uint16_t x_u = x_bits.uintval();
-  uint16_t x_abs = x_u & 0x7fffU;
-
-  // When |x| <= 2^(-3), or |x| >= -11 * log(2), or x is NaN.
-  if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x47a0U)) {
-    // expm1(NaN) = NaN
-    if (x_bits.is_nan()) {
-      if (x_bits.is_signaling_nan()) {
-        fputil::raise_except_if_required(FE_INVALID);
-        return FPBits::quiet_nan().get_val();
-      }
-
-      return x;
-    }
-
-    // expm1(+/-0) = +/-0
-    if (x_abs == 0)
-      return x;
-
-    // When x >= 16 * log(2).
-    if (x_bits.is_pos() && x_abs >= 0x498cU) {
-      // expm1(+inf) = +inf
-      if (x_bits.is_inf())
-        return FPBits::inf().get_val();
-
-      switch (fputil::quick_get_round()) {
-      case FE_TONEAREST:
-      case FE_UPWARD:
-        fputil::set_errno_if_required(ERANGE);
-        fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT);
-        return FPBits::inf().get_val();
-      default:
-        return FPBits::max_normal().get_val();
-      }
-    }
-
-    // When x <= -11 * log(2).
-    if (x_u >= 0xc7a0U) {
-      // expm1(-inf) = -1
-      if (x_bits.is_inf())
-        return FPBits::one(Sign::NEG).get_val();
-
-      // When x > -0x1.0ap+3, round(expm1(x), HP, RN) = -1.
-      if (x_u > 0xc828U)
-        return fputil::round_result_slightly_up(
-            FPBits::one(Sign::NEG).get_val());
-      // When x <= -0x1.0ap+3, round(expm1(x), HP, RN) = -0x1.ffcp-1.
-      return fputil::round_result_slightly_down(
-          fputil::cast<float16>(-0x1.ffcp-1));
-    }
-
-    // When 0 < |x| <= 2^(-3).
-    if (x_abs <= 0x3000U && !x_bits.is_zero()) {
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-      if (auto r = EXPM1F16_EXCEPTS_LO.lookup(x_u);
-          LIBC_UNLIKELY(r.has_value()))
-        return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-      float xf = x;
-      // Degree-5 minimax polynomial generated by Sollya with the following
-      // commands:
-      //   > display = hexadecimal;
-      //   > P = fpminimax(expm1(x)/x, 4, [|SG...|], [-2^-3, 2^-3]);
-      //   > x * P;
-      return fputil::cast<float16>(
-          xf * fputil::polyeval(xf, 0x1p+0f, 0x1.fffff8p-2f, 0x1.555556p-3f,
-                                0x1.55905ep-5f, 0x1.1124c2p-7f));
-    }
-  }
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-  if (auto r = EXPM1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
-    return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-  // exp(x) = exp(hi + mid) * exp(lo)
-  auto [exp_hi_mid, exp_lo] = exp_range_reduction(x);
-  // expm1(x) = exp(hi + mid) * exp(lo) - 1
-  return fputil::cast<float16>(fputil::multiply_add(exp_hi_mid, exp_lo, -1.0f));
-}
+LLVM_LIBC_FUNCTION(float16, expm1f16, (float16 x)) { return math::expm1f16(x); }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fma.cpp b/libc/src/math/generic/fma.cpp
index 2ea4ae9961150..3ccdb78846e34 100644
--- a/libc/src/math/generic/fma.cpp
+++ b/libc/src/math/generic/fma.cpp
@@ -7,15 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/fma.h"
-#include "src/__support/common.h"
-
-#include "src/__support/FPUtil/FMA.h"
-#include "src/__support/macros/config.h"
+#include "src/__support/math/fma.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(double, fma, (double x, double y, double z)) {
-  return fputil::fma<double>(x, y, z);
+  return math::fma(x, y, z);
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt
index 8d81199f19c83..cd4b5ec75f876 100644
--- a/libc/test/shared/CMakeLists.txt
+++ b/libc/test/shared/CMakeLists.txt
@@ -43,11 +43,17 @@ add_fp_unittest(
     libc.src.__support.math.exp2
     libc.src.__support.math.exp2f
     libc.src.__support.math.exp2f16
+    libc.src.__support.math.exp2m1f
+    libc.src.__support.math.exp2m1f16
+    libc.src.__support.math.expm1
+    libc.src.__support.math.expm1f
+    libc.src.__support.math.expm1f16
     libc.src.__support.math.exp10
     libc.src.__support.math.exp10f
     libc.src.__support.math.exp10f16
     libc.src.__support.math.expf
     libc.src.__support.math.expf16
+    libc.src.__support.math.fma
     libc.src.__support.math.frexpf
     libc.src.__support.math.frexpf128
     libc.src.__support.math.frexpf16
diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp
index 84787d5e02a3a..7357e24603004 100644
--- a/libc/test/shared/shared_math_test.cpp
+++ b/libc/test/shared/shared_math_test.cpp
@@ -29,7 +29,9 @@ TEST(LlvmLibcSharedMathTest, AllFloat16) {
   EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::exp10f16(0.0f16));
   EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::exp10m1f16(0.0f16));
   EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::exp2f16(0.0f16));
+  EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::exp2m1f16(0.0f16));
   EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::expf16(0.0f16));
+  EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::expm1f16(0.0f16));
 
   ASSERT_FP_EQ(float16(8 << 5), LIBC_NAMESPACE::shared::ldexpf16(8.0f16, 5));
   ASSERT_FP_EQ(float16(-1 * (8 << 5)),
@@ -61,8 +63,10 @@ TEST(LlvmLibcSharedMathTest, AllFloat) {
   EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::exp10m1f(0.0f));
   EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::erff(0.0f));
   EXPECT_FP_EQ(0x1p+0f, LIBC_NAMESPACE::shared::exp10f(0.0f));
+  EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::exp2m1f(0.0f));
   EXPECT_FP_EQ(0x1p+0f, LIBC_NAMESPACE::shared::expf(0.0f));
   EXPECT_FP_EQ(0x1p+0f, LIBC_NAMESPACE::shared::exp2f(0.0f));
+  EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::expm1f(0.0f));
 
   EXPECT_FP_EQ_ALL_ROUNDING(0.75f,
                             LIBC_NAMESPACE::shared::frexpf(24.0f, &exponent));
@@ -85,6 +89,8 @@ TEST(LlvmLibcSharedMathTest, AllDouble) {
   EXPECT_FP_EQ(0x1p+0, LIBC_NAMESPACE::shared::exp(0.0));
   EXPECT_FP_EQ(0x1p+0, LIBC_NAMESPACE::shared::exp2(0.0));
   EXPECT_FP_EQ(0x1p+0, LIBC_NAMESPACE::shared::exp10(0.0));
+  EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::expm1(0.0));
+  EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::fma(0.0, 0.0, 0.0));
 }
 
 #ifdef LIBC_TYPES_HAS_FLOAT128
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index c7e3aa692b1fb..1902b43216a7c 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -2791,6 +2791,14 @@ libc_support_library(
     ],
 )
 
+libc_support_library(
+    name = "__support_math_fma",
+    hdrs = ["src/__support/math/fma.h"],
+    deps = [
+        ":__support_fputil_fma",
+    ],
+)
+
 libc_support_library(
     name = "__support_math_frexpf128",
     hdrs = ["src/__support/math/frexpf128.h"],
@@ -2947,6 +2955,38 @@ libc_support_library(
     ],
 )
 
+libc_support_library(
+    name = "__support_math_exp2m1f",
+    hdrs = ["src/__support/math/exp2m1f.h"],
+    deps = [
+        ":__support_fputil_except_value_utils",
+        ":__support_fputil_fma",
+        ":__support_fputil_multiply_add",
+        ":__support_fputil_nearest_integer",
+        ":__support_fputil_polyeval",
+        ":__support_fputil_rounding_mode",
+        ":__support_macros_optimization",
+        ":__support_math_common_constants",
+        ":__support_math_exp10f_utils",
+    ],
+)
+
+libc_support_library(
+    name = "__support_math_exp2m1f16",
+    hdrs = ["src/__support/math/exp2m1f16.h"],
+    deps = [
+        ":__support_fputil_except_value_utils",
+        ":__support_fputil_fma",
+        ":__support_fputil_multiply_add",
+        ":__support_fputil_nearest_integer",
+        ":__support_fputil_polyeval",
+        ":__support_fputil_rounding_mode",
+        ":__support_macros_optimization",
+        ":__support_math_common_constants",
+        ":__support_math_expxf16_utils",
+    ],
+)
+
 libc_support_library(
     name = "__support_math_exp10",
     hdrs = ["src/__support/math/exp10.h"],
@@ -3024,6 +3064,55 @@ libc_support_library(
     ],
 )
 
+libc_support_library(
+    name = "__support_math_expm1",
+    hdrs = ["src/__support/math/expm1.h"],
+    deps = [
+        ":__support_fputil_double_double",
+        ":__support_fputil_dyadic_float",
+        ":__support_fputil_except_value_utils",
+        ":__support_fputil_multiply_add",
+        ":__support_fputil_polyeval",
+        ":__support_fputil_rounding_mode",
+        ":__support_fputil_triple_double",
+        ":__support_integer_literals",
+        ":__support_macros_optimization",
+        ":__support_math_common_constants",
+        ":__support_math_exp_constants",
+    ],
+)
+
+libc_support_library(
+    name = "__support_math_expm1f",
+    hdrs = ["src/__support/math/expm1f.h"],
+    deps = [
+        ":__support_fputil_fma",
+        ":__support_fputil_multiply_add",
+        ":__support_fputil_nearest_integer",
+        ":__support_fputil_polyeval",
+        ":__support_fputil_rounding_mode",
+        ":__support_macros_optimization",
+        ":__support_macros_properties_cpu_features",
+        ":__support_math_common_constants",
+    ],
+)
+
+libc_support_library(
+    name = "__support_math_expm1f16",
+    hdrs = ["src/__support/math/expm1f16.h"],
+    deps = [
+        ":__support_fputil_except_value_utils",
+        ":__support_fputil_fma",
+        ":__support_fputil_multiply_add",
+        ":__support_fputil_nearest_integer",
+        ":__support_fputil_polyeval",
+        ":__support_fputil_rounding_mode",
+        ":__support_macros_optimization",
+        ":__support_macros_properties_cpu_features",
+        ":__support_math_expxf16_utils",
+    ],
+)
+
 libc_support_library(
     name = "__support_range_reduction_double",
     hdrs = [
@@ -3734,51 +3823,35 @@ libc_math_function(
 libc_math_function(
     name = "exp2m1f",
     additional_deps = [
-        ":__support_fputil_polyeval",
-        ":__support_math_exp10f_utils",
+        ":__support_math_exp2m1f",
     ],
 )
 
 libc_math_function(
     name = "exp2m1f16",
     additional_deps = [
-        ":__support_math_expxf16_utils",
+        ":__support_math_exp2m1f16",
     ],
 )
 
 libc_math_function(
     name = "expm1",
     additional_deps = [
-        ":__support_fputil_double_double",
-        ":__support_fputil_dyadic_float",
-        ":__support_fputil_multiply_add",
-        ":__support_fputil_polyeval",
-        ":__support_fputil_rounding_mode",
-        ":__support_fputil_triple_double",
-        ":__support_integer_literals",
-        ":__support_macros_optimization",
-        ":__support_math_common_constants",
+        ":__support_math_expm1",
     ],
 )
 
 libc_math_function(
     name = "expm1f",
     additional_deps = [
-        ":__support_fputil_fma",
-        ":__support_fputil_multiply_add",
-        ":__support_fputil_nearest_integer",
-        ":__support_fputil_polyeval",
-        ":__support_fputil_rounding_mode",
-        ":__support_macros_optimization",
-        ":__support_macros_properties_cpu_features",
-        ":__support_math_common_constants",
+        ":__support_math_expm1f",
     ],
 )
 
 libc_math_function(
     name = "expm1f16",
     additional_deps = [
-        ":__support_math_expxf16_utils",
+        ":__support_math_expm1f16",
     ],
 )
 
@@ -3936,7 +4009,7 @@ libc_math_function(name = "floorf16")
 libc_math_function(
     name = "fma",
     additional_deps = [
-        ":__support_fputil_fma",
+        ":__support_math_fma",
     ],
 )