Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
213 changes: 179 additions & 34 deletions glm/gtx/fast_exponential.inl
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,154 @@

namespace glm
{
// fastPow:
template<>
GLM_FUNC_QUALIFIER float fastExp2(float x)
{
//approximated using "exp2(x)" formula
//for +x = exp2(x)
//for -x = 1/exp2(abs(x))
float mx, out, C0, C1, C2, C3, C4, C5;
bool reciprocal;
uint32_t exponent, tmpx;
// coefficients interval [0, 1]
C0 = 0.00189646114543331e-00f;
C1 = 0.00894282898410912e-00f;
C2 = 0.05586624630452070e-00f;
C3 = 0.24013971109076948e-00f;
C4 = 0.69315475247516734e-00f;
C5 = 0.99999989311082671e-00f;
mx = x;
reciprocal = (*(uint32_t*)&x) & 0x80000000;
tmpx = (*(uint32_t*)&x) & 0x7FFFFFFF;
mx = *(float*)&tmpx;
exponent = (uint32_t)mx;
mx = mx - (float)exponent;
out = (((((C0 * mx + C1) * mx + C2) * mx + C3) * mx + C4) * mx + C5);
tmpx = (exponent + 127) << 23;
out = (*(float*)&tmpx) * out;
out = reciprocal ? (1.0f / out) : out;
return out;
}

template<>
GLM_FUNC_QUALIFIER double fastExp2(double x)
{
//approximated using "exp2(x)" formula
//for +x = exp2(x)
//for -x = 1/exp2(abs(x))
double mx, out, C0, C1, C2, C3, C4, C5;
bool reciprocal;
uint64_t exponent, tmpx;
//coefficients interval [0, 1]
C0 = 0.00189646114543331e-00;
C1 = 0.00894282898410912e-00;
C2 = 0.05586624630452070e-00;
C3 = 0.24013971109076948e-00;
C4 = 0.69315475247516734e-00;
C5 = 0.99999989311082671e-00;
mx = x;
reciprocal = (*(uint64_t*)&x) & 0x8000000000000000;
tmpx = (*(uint64_t*)&x) & 0x7FFFFFFFFFFFFFFF;
mx = *(double*)&tmpx;
exponent = (uint64_t)mx;
mx = mx - (double)exponent;
out = (((((C0 * mx + C1) * mx + C2) * mx + C3) * mx + C4) * mx + C5);
tmpx = (exponent + 1023) << 52;
out = (*(double*)&tmpx) * out;
out = reciprocal ? (1.0 / out) : out;
return out;
}

template<>
GLM_FUNC_QUALIFIER float fastLog2(float x)
{
//approximated using this formula "log2(x)"
//interval [1, 1.5]
//for x mantissa <= 1.5 log2(x) ≈ C0 * x + C1 * x2...
//for x mantissa >= 1.5 log2(x) ≈ log2(x / 1.5) + log2(1.5)
//for x <= 1.0 = -log2(1/x)
float mx, l2, inv_three_half, lx, out, poly, C0, C1, C2, C3, C4, C5, C6;
bool low, low_mantissa;
uint32_t tmpx;
// coefficients
C0 = -0.067508561412635e-00f;
C1 = 0.605786644896372e-00f;
C2 = -2.351461115180550e-00f;
C3 = 5.175006419193522e-00f;
C4 = -7.182524695365589e-00f;
C5 = 7.064678543395325e-00f;
C6 = -3.243977190921664e-00f;
// constants
l2 = 0.5849625f; //log2(1.5)
inv_three_half = 0.6666667f; // 1.0 / 1.5
mx = x;
low = mx < 1.0f;
mx = low ? (1.0f / mx) : mx;
tmpx = 1065353216 | ((*(uint32_t*)&mx) & 0x007FFFFF);
lx = *(float*)&tmpx;
low_mantissa = lx <= 1.5f;
lx = low_mantissa ? lx : (lx * inv_three_half);
poly = ((((((C0 * lx + C1) * lx + C2) * lx + C3) * lx + C4) * lx + C5) * lx + C6);
poly = low_mantissa ? poly : (poly + l2);
out = (((*(uint32_t*)&mx) >> 23) - 127) + poly;
out = low ? -out : out;
return out;
}

template<>
GLM_FUNC_QUALIFIER double fastLog2(double x)
{
//approximated using this formula "log2(x)"
//interval [1, 1.5]
//for x mantissa <= 1.5 log2(x) ≈ C0 * x + C1 * x2...
//for x mantissa >= 1.5 log2(x) ≈ log2(x / 1.5) + log2(1.5)
//for x <= 1.0 = -log2(1/x)
double mx, l2, inv_three_half, lx, out, poly, C0, C1, C2, C3, C4, C5, C6;
bool low, low_mantissa;
uint64_t tmpx;
// coefficients
C0 = -0.067508561412635e-00;
C1 = 0.605786644896372e-00;
C2 = -2.351461115180550e-00;
C3 = 5.175006419193522e-00;
C4 = -7.182524695365589e-00;
C5 = 7.064678543395325e-00;
C6 = -3.243977190921664e-00;
// constants
l2 = 0.5849625; // log2(1.5)
inv_three_half = 0.6666667; // 1.0 / 1.5
mx = x;
low = mx < 1.0;
mx = low ? (1.0 / mx) : mx;
tmpx = 4607182418800017408U | ((*(uint64_t*)&mx) & 0x000FFFFFFFFFFFFF);
lx = *(double*)&tmpx;
low_mantissa = lx <= 1.5;
lx = low_mantissa ? lx : (lx * inv_three_half);
poly = ((((((C0 * lx + C1) * lx + C2) * lx + C3) * lx + C4) * lx + C5) * lx + C6);
poly = low_mantissa ? poly : (poly + l2);
out = (((*(uint64_t*)&mx) >> 52) - 1023) + poly;
out = low ? -out : out;
return out;
}

// fastPow
template<typename genType>
GLM_FUNC_QUALIFIER genType fastPow(genType x, genType y)
{
return exp(y * log(x));
}

template<>
GLM_FUNC_QUALIFIER float fastPow(float x, float y)
{
return fastExp2<float>(y * fastLog2<float>(x));
}

template<>
GLM_FUNC_QUALIFIER double fastPow(double x, double y)
{
return fastExp2<double>(y * fastLog2<double>(x));
}

template<length_t L, typename T, qualifier Q>
GLM_FUNC_QUALIFIER vec<L, T, Q> fastPow(vec<L, T, Q> const& x, vec<L, T, Q> const& y)
Expand All @@ -34,17 +176,16 @@ namespace glm
}

// fastExp
// Note: This function provides accurate results only for value between -1 and 1, else avoid it.
template<typename T>
GLM_FUNC_QUALIFIER T fastExp(T x)
template<>
GLM_FUNC_QUALIFIER float fastExp(float x)
{
// This has a better looking and same performance in release mode than the following code. However, in debug mode it's slower.
// return 1.0f + x * (1.0f + x * 0.5f * (1.0f + x * 0.3333333333f * (1.0f + x * 0.25 * (1.0f + x * 0.2f))));
T x2 = x * x;
T x3 = x2 * x;
T x4 = x3 * x;
T x5 = x4 * x;
return T(1) + x + (x2 * T(0.5)) + (x3 * T(0.1666666667)) + (x4 * T(0.041666667)) + (x5 * T(0.008333333333));
return fastExp2<float>(1.4426950409f * x);
}

template<>
GLM_FUNC_QUALIFIER double fastExp(double x)
{
return fastExp2<double>(1.4426950409 * x);
}
/* // Try to handle all values of float... but often shower than std::exp, glm::floor and the loop kill the performance
GLM_FUNC_QUALIFIER float fastExp(float x)
Expand Down Expand Up @@ -86,28 +227,6 @@ namespace glm
return detail::functor1<vec, L, T, T, Q>::call(fastExp, x);
}

// fastLog
template<typename genType>
GLM_FUNC_QUALIFIER genType fastLog(genType x)
{
return std::log(x);
}

/* Slower than the VC7.1 function...
GLM_FUNC_QUALIFIER float fastLog(float x)
{
float y1 = (x - 1.0f) / (x + 1.0f);
float y2 = y1 * y1;
return 2.0f * y1 * (1.0f + y2 * (0.3333333333f + y2 * (0.2f + y2 * 0.1428571429f)));
}
*/

template<length_t L, typename T, qualifier Q>
GLM_FUNC_QUALIFIER vec<L, T, Q> fastLog(vec<L, T, Q> const& x)
{
return detail::functor1<vec, L, T, T, Q>::call(fastLog, x);
}

//fastExp2, ln2 = 0.69314718055994530941723212145818f
template<typename genType>
GLM_FUNC_QUALIFIER genType fastExp2(genType x)
Expand All @@ -127,10 +246,36 @@ namespace glm
{
return fastLog(x) / static_cast<genType>(0.69314718055994530941723212145818);
}

template<length_t L, typename T, qualifier Q>
GLM_FUNC_QUALIFIER vec<L, T, Q> fastLog2(vec<L, T, Q> const& x)
{
return detail::functor1<vec, L, T, T, Q>::call(fastLog2, x);
}

// fastLog
template<typename genType>
GLM_FUNC_QUALIFIER genType fastLog(genType x)
{
return std::log(x);
}

// Slower than the VC7.1 function...
template<>
GLM_FUNC_QUALIFIER float fastLog(float x)
{
return fastLog2<float>(x) * 0.69314718055994530941723212145818f;
}

template<>
GLM_FUNC_QUALIFIER double fastLog(double x)
{
return fastLog2<double>(x) * 0.69314718055994530941723212145818;
}

template<length_t L, typename T, qualifier Q>
GLM_FUNC_QUALIFIER vec<L, T, Q> fastLog(vec<L, T, Q> const& x)
{
return detail::functor1<vec, L, T, T, Q>::call(fastLog, x);
}
}//namespace glm