diff --git a/glm/gtx/fast_exponential.inl b/glm/gtx/fast_exponential.inl index 5b1174246..be1f1787b 100644 --- a/glm/gtx/fast_exponential.inl +++ b/glm/gtx/fast_exponential.inl @@ -2,12 +2,154 @@ namespace glm { - // fastPow: + template<> + GLM_FUNC_QUALIFIER float fastExp2(float x) + { + //approximated using "exp2(x)" formula + //for +x = exp2(x) + //for -x = 1/exp2(abs(x)) + float mx, out, C0, C1, C2, C3, C4, C5; + bool reciprocal; + uint32_t exponent, tmpx; + // coefficients interval [0, 1] + C0 = 0.00189646114543331e-00f; + C1 = 0.00894282898410912e-00f; + C2 = 0.05586624630452070e-00f; + C3 = 0.24013971109076948e-00f; + C4 = 0.69315475247516734e-00f; + C5 = 0.99999989311082671e-00f; + mx = x; + reciprocal = (*(uint32_t*)&x) & 0x80000000; + tmpx = (*(uint32_t*)&x) & 0x7FFFFFFF; + mx = *(float*)&tmpx; + exponent = (uint32_t)mx; + mx = mx - (float)exponent; + out = (((((C0 * mx + C1) * mx + C2) * mx + C3) * mx + C4) * mx + C5); + tmpx = (exponent + 127) << 23; + out = (*(float*)&tmpx) * out; + out = reciprocal ? (1.0f / out) : out; + return out; + } + + template<> + GLM_FUNC_QUALIFIER double fastExp2(double x) + { + //approximated using "exp2(x)" formula + //for +x = exp2(x) + //for -x = 1/exp2(abs(x)) + double mx, out, C0, C1, C2, C3, C4, C5; + bool reciprocal; + uint64_t exponent, tmpx; + //coefficients interval [0, 1] + C0 = 0.00189646114543331e-00; + C1 = 0.00894282898410912e-00; + C2 = 0.05586624630452070e-00; + C3 = 0.24013971109076948e-00; + C4 = 0.69315475247516734e-00; + C5 = 0.99999989311082671e-00; + mx = x; + reciprocal = (*(uint64_t*)&x) & 0x8000000000000000; + tmpx = (*(uint64_t*)&x) & 0x7FFFFFFFFFFFFFFF; + mx = *(double*)&tmpx; + exponent = (uint64_t)mx; + mx = mx - (double)exponent; + out = (((((C0 * mx + C1) * mx + C2) * mx + C3) * mx + C4) * mx + C5); + tmpx = (exponent + 1023) << 52; + out = (*(double*)&tmpx) * out; + out = reciprocal ? (1.0 / out) : out; + return out; + } + + template<> + GLM_FUNC_QUALIFIER float fastLog2(float x) + { + //approximated using this formula "log2(x)" + //interval [1, 1.5] + //for x mantissa <= 1.5 log2(x) ≈ C0 * x + C1 * x2... + //for x mantissa >= 1.5 log2(x) ≈ log2(x / 1.5) + log2(1.5) + //for x <= 1.0 = -log2(1/x) + float mx, l2, inv_three_half, lx, out, poly, C0, C1, C2, C3, C4, C5, C6; + bool low, low_mantissa; + uint32_t tmpx; + // coefficients + C0 = -0.067508561412635e-00f; + C1 = 0.605786644896372e-00f; + C2 = -2.351461115180550e-00f; + C3 = 5.175006419193522e-00f; + C4 = -7.182524695365589e-00f; + C5 = 7.064678543395325e-00f; + C6 = -3.243977190921664e-00f; + // constants + l2 = 0.5849625f; //log2(1.5) + inv_three_half = 0.6666667f; // 1.0 / 1.5 + mx = x; + low = mx < 1.0f; + mx = low ? (1.0f / mx) : mx; + tmpx = 1065353216 | ((*(uint32_t*)&mx) & 0x007FFFFF); + lx = *(float*)&tmpx; + low_mantissa = lx <= 1.5f; + lx = low_mantissa ? lx : (lx * inv_three_half); + poly = ((((((C0 * lx + C1) * lx + C2) * lx + C3) * lx + C4) * lx + C5) * lx + C6); + poly = low_mantissa ? poly : (poly + l2); + out = (((*(uint32_t*)&mx) >> 23) - 127) + poly; + out = low ? -out : out; + return out; + } + + template<> + GLM_FUNC_QUALIFIER double fastLog2(double x) + { + //approximated using this formula "log2(x)" + //interval [1, 1.5] + //for x mantissa <= 1.5 log2(x) ≈ C0 * x + C1 * x2... + //for x mantissa >= 1.5 log2(x) ≈ log2(x / 1.5) + log2(1.5) + //for x <= 1.0 = -log2(1/x) + double mx, l2, inv_three_half, lx, out, poly, C0, C1, C2, C3, C4, C5, C6; + bool low, low_mantissa; + uint64_t tmpx; + // coefficients + C0 = -0.067508561412635e-00; + C1 = 0.605786644896372e-00; + C2 = -2.351461115180550e-00; + C3 = 5.175006419193522e-00; + C4 = -7.182524695365589e-00; + C5 = 7.064678543395325e-00; + C6 = -3.243977190921664e-00; + // constants + l2 = 0.5849625; // log2(1.5) + inv_three_half = 0.6666667; // 1.0 / 1.5 + mx = x; + low = mx < 1.0; + mx = low ? (1.0 / mx) : mx; + tmpx = 4607182418800017408U | ((*(uint64_t*)&mx) & 0x000FFFFFFFFFFFFF); + lx = *(double*)&tmpx; + low_mantissa = lx <= 1.5; + lx = low_mantissa ? lx : (lx * inv_three_half); + poly = ((((((C0 * lx + C1) * lx + C2) * lx + C3) * lx + C4) * lx + C5) * lx + C6); + poly = low_mantissa ? poly : (poly + l2); + out = (((*(uint64_t*)&mx) >> 52) - 1023) + poly; + out = low ? -out : out; + return out; + } + + // fastPow template GLM_FUNC_QUALIFIER genType fastPow(genType x, genType y) { return exp(y * log(x)); } + + template<> + GLM_FUNC_QUALIFIER float fastPow(float x, float y) + { + return fastExp2(y * fastLog2(x)); + } + + template<> + GLM_FUNC_QUALIFIER double fastPow(double x, double y) + { + return fastExp2(y * fastLog2(x)); + } template GLM_FUNC_QUALIFIER vec fastPow(vec const& x, vec const& y) @@ -34,17 +176,16 @@ namespace glm } // fastExp - // Note: This function provides accurate results only for value between -1 and 1, else avoid it. - template - GLM_FUNC_QUALIFIER T fastExp(T x) + template<> + GLM_FUNC_QUALIFIER float fastExp(float x) { - // This has a better looking and same performance in release mode than the following code. However, in debug mode it's slower. - // return 1.0f + x * (1.0f + x * 0.5f * (1.0f + x * 0.3333333333f * (1.0f + x * 0.25 * (1.0f + x * 0.2f)))); - T x2 = x * x; - T x3 = x2 * x; - T x4 = x3 * x; - T x5 = x4 * x; - return T(1) + x + (x2 * T(0.5)) + (x3 * T(0.1666666667)) + (x4 * T(0.041666667)) + (x5 * T(0.008333333333)); + return fastExp2(1.4426950409f * x); + } + + template<> + GLM_FUNC_QUALIFIER double fastExp(double x) + { + return fastExp2(1.4426950409 * x); } /* // Try to handle all values of float... but often shower than std::exp, glm::floor and the loop kill the performance GLM_FUNC_QUALIFIER float fastExp(float x) @@ -86,28 +227,6 @@ namespace glm return detail::functor1::call(fastExp, x); } - // fastLog - template - GLM_FUNC_QUALIFIER genType fastLog(genType x) - { - return std::log(x); - } - - /* Slower than the VC7.1 function... - GLM_FUNC_QUALIFIER float fastLog(float x) - { - float y1 = (x - 1.0f) / (x + 1.0f); - float y2 = y1 * y1; - return 2.0f * y1 * (1.0f + y2 * (0.3333333333f + y2 * (0.2f + y2 * 0.1428571429f))); - } - */ - - template - GLM_FUNC_QUALIFIER vec fastLog(vec const& x) - { - return detail::functor1::call(fastLog, x); - } - //fastExp2, ln2 = 0.69314718055994530941723212145818f template GLM_FUNC_QUALIFIER genType fastExp2(genType x) @@ -127,10 +246,36 @@ namespace glm { return fastLog(x) / static_cast(0.69314718055994530941723212145818); } - + template GLM_FUNC_QUALIFIER vec fastLog2(vec const& x) { return detail::functor1::call(fastLog2, x); } + + // fastLog + template + GLM_FUNC_QUALIFIER genType fastLog(genType x) + { + return std::log(x); + } + + // Slower than the VC7.1 function... + template<> + GLM_FUNC_QUALIFIER float fastLog(float x) + { + return fastLog2(x) * 0.69314718055994530941723212145818f; + } + + template<> + GLM_FUNC_QUALIFIER double fastLog(double x) + { + return fastLog2(x) * 0.69314718055994530941723212145818; + } + + template + GLM_FUNC_QUALIFIER vec fastLog(vec const& x) + { + return detail::functor1::call(fastLog, x); + } }//namespace glm