仅使用单精度浮点在[0，pi]上近似余弦 [英] Approximating cosine on [0,pi] using only single precision floating point

查看：149 发布时间：2020/11/8 22:17:43 c floating-point trigonometry approximation single-precision

本文介绍了仅使用单精度浮点在[0，pi]上近似余弦的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我目前正在研究余弦的近似值.由于最终的目标设备是使用32位浮点ALU/LU进行的自行开发，并且具有针对C的专用编译器，因此我无法使用c库数学函数(cosf，...).我的目标是编写各种在准确性和指令/周期数方面不同的方法.

我已经尝试了很多不同的近似算法，从fdlibm，taylor扩展，pade近似，使用maple的remez算法等等.

但是，只要我仅使用浮点精度实现它们，就会大大损失精度.并确保:我知道以双精度运行时，更高的精度根本没有问题...

现在，我有一些近似值，精确到pi/2(发生最大误差的范围)附近几千ulp，我感到我受到单精度转换的限制.

要解决主题参数减少:输入为弧度.我认为参数减法会由于除法/乘法而导致更多的精度损失.由于我的总输入范围仅为0..pi，因此我决定将参数减为0..pi/2.

因此，我的问题是:有人知道高精度的余弦函数的单个精度近似值吗(最好是高效)?是否有用于优化单精度近似值的算法?您是否知道内置cosf函数是否在内部以单精度或双精度计算值? 〜

float ua_cos_v2(float x)
{
    float output;
    float myPi = 3.1415927410125732421875f;
    if (x < 0) x = -x;
    int quad = (int32_t)(x*0.63661977236f);//quad = x/(pi/2) = x*2/pi
    if (x<1.58f && x> 1.57f) //exclude approximation around pi/2
    {
        output = -(x - 1.57079637050628662109375f) - 2.0e-12f*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f) + 0.16666667163372039794921875f*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f) + 2.0e-13f*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f)+ 0.000198412701138295233249664306640625f*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f);
        output -= 4.37E-08f;
    }
    else {
        float param_x;
        int param_quad = -1;
        switch (quad)
        {
        case 0:
            param_x = x;
            break;
        case 1:
            param_x = myPi - x;
            param_quad = 1;
            break;
        case 2:
            param_x = x - myPi;
            break;
        case 3:
            param_x = 2 * myPi - x;
            break;
        }
        float c1 = 1.0f,
            c2 = -0.5f,
            c3 = 0.0416666679084300994873046875f,
            c4 = -0.001388888922519981861114501953125f,
            c5 = 0.00002480158218531869351863861083984375f,
            c6 = -2.75569362884198199026286602020263671875E-7f,
            c7 = 2.08583283978214240050874650478363037109375E-9f,
            c8 = -1.10807162057025010426514199934899806976318359375E-11f;
        float _x2 = param_x * param_x;
        output = c1 + _x2*(c2 + _x2*(c3 + _x2*(c4 + _x2*(c5 + _x2*(c6 + _x2*(c7 
        + _x2* c8))))));
        if (param_quad == 1 || param_quad == 0)
            output = -output;
    }
    return output;
}

〜

如果我忘记了任何信息，请随时询问！

预先感谢

解决方案

当然，仅使用本机精度运算，就可以在[0，π]上以任何期望的误差范围> = 0.5 ulp计算余弦.但是，目标越接近正确舍入的函数，就需要越多的前期设计工作和运行时的计算工作.

超越函数的实现通常包括参数减少，核心近似，最终修正以抵消参数减少.在参数减少涉及减法的情况下，需要通过显式或隐式使用更高的精度来避免灾难性的取消.隐式技术可以设计为仅依赖于本机精度计算，例如，在使用IEEE-754 binary32(单精度)时，可以将π等常数拆分为未求和的和，例如1.57079637e+0f - 4.37113883e-8f.

当硬件提供融合乘加(FMA)操作时，通过本机精度计算实现高精度要容易得多. OP没有指定他们的目标平台是否提供此操作，因此我将首先展示一种非常简单的方法，该方法仅依赖于乘法和加法即可提供中等精度(最大误差<5 ulps).我假设硬件符合IEEE-754标准，并假定float被映射为IEEE-754 binary32格式.

以下内容基于Colin Wallace的博客文章使用Chebyshev多项式将sin(x)近似为5 ULP"，在撰写本文时无法在线获得.我最初是在此处检索的，而Google目前保留了 /* Approximate cosine on [0, PI] with maximum error of 4.704174 ulp */ float cosine (float x) { const float half_pi_hi = 1.57079637e+0f; // 0x1.921fb6p+0 const float half_pi_lo = -4.37113883e-8f; // -0x1.777a5cp-25 const float three_half_pi_hi = 4.71238899e+0f; // 0x1.2d97c8p+2 const float three_half_pi_lo = -1.19248806e-8f; // -0x1.99bc5cp-27 float p, s, hpmx, thpmx, nhpmx; /* cos(x) = sin (pi/2 - x) = sin (hpmx) */ hpmx = (half_pi_hi - x) + half_pi_lo; // pi/2-x thpmx = (three_half_pi_hi - x) + three_half_pi_lo; // 3*pi/2 - x nhpmx = (-half_pi_hi - x) - half_pi_lo; // -pi/2 - x /* P(hpmx*hpmx) ~= sin (hpmx) / (hpmx * (hpmx * hpmx - pi * pi)) */ s = hpmx * hpmx; p = 1.32729383e-10f; p = p * s - 2.33177868e-8f; p = p * s + 2.52223435e-6f; p = p * s - 1.73503853e-4f; p = p * s + 6.62087463e-3f; p = p * s - 1.01321176e-1f; return hpmx * nhpmx * thpmx * p; }

下面，我将展示一种经典方法，该方法首先在记录象限时将自变量简化为[-π/4，π/4].然后，该象限告诉我们是否需要在该主近似区间上计算正弦或余弦的多项式近似，以及是否需要翻转最终结果的符号.该代码假定目标平台支持IEEE-754指定的FMA操作，并且已通过标准C函数fmaf()进行了映射以实现单精度.

该代码是简单明了的，除了具有舍入模式的四舍五入到浮点数到整数或最接近数的转换之外，该转换用于计算象限，这是通过幻数加法"执行的.并与2/π相乘(等于除以π/2)相结合.最大误差小于1.5 ulps.

 /* compute cosine on [0, PI] with maximum error of 1.429027 ulp */
float my_cosf (float a)
{
    const float half_pi_hi =  1.57079637e+0f; //  0x1.921fb6p+0
    const float half_pi_lo = -4.37113883e-8f; // -0x1.777a5cp-25
    float c, j, r, s, sa, t;
    int i;

    /* subtract closest multiple of pi/2 giving reduced argument and quadrant */
    j = fmaf (a, 6.36619747e-1f, 12582912.f) - 12582912.f; // 2/pi, 1.5 * 2**23
    a = fmaf (j, -half_pi_ a);
    a = fmaf (j, -half_pi_lo, a);

    /* phase shift of pi/2 (one quadrant) for cosine */
    i = (int)j;
    i = i + 1;

    sa = a * a;
    /* Approximate cosine on [-PI/4,+PI/4] with maximum error of 0.87444 ulp */
    c =               2.44677067e-5f;  //  0x1.9a8000p-16
    c = fmaf (c, sa, -1.38877297e-3f); // -0x1.6c0efap-10
    c = fmaf (c, sa,  4.16666567e-2f); //  0x1.555550p-5
    c = fmaf (c, sa, -5.00000000e-1f); // -0x1.000000p-1
    c = fmaf (c, sa,  1.00000000e+0f); //  1.00000000p+0
    /* Approximate sine on [-PI/4,+PI/4] with maximum error of 0.64196 ulp */
    s =               2.86567956e-6f;  //  0x1.80a000p-19
    s = fmaf (s, sa, -1.98559923e-4f); // -0x1.a0690cp-13
    s = fmaf (s, sa,  8.33338592e-3f); //  0x1.111182p-7
    s = fmaf (s, sa, -1.66666672e-1f); // -0x1.555556p-3
    t = a * sa;
    s = fmaf (s, t, a);

    /* select sine approximation or cosine approximation based on quadrant */
    r = (i & 1) ? c : s;
    /* adjust sign based on quadrant */
    r = (i & 2) ? (0.0f - r) : r;

    return r;
}

事实证明，在这种特殊情况下，使用FMA在准确性方面仅提供了很小的好处.如果用((a)*(b)+(c))替换对fmaf(a,b,c)的调用，则最大错误最小增加到1.451367 ulps，即保持在1.5 ulps以下.

i'm currently working on an approximation of the cosine. Since the final target device is a self-developement working with 32 bit floating point ALU / LU and there is a specialized compiler for C, I am not able to use the c library math functions (cosf,...). I'm aiming to code various methods that differ in terms of accuracy and number of instructions / cycles.

I've already tried a lot of different approximation algorithms, starting from fdlibm, taylor expansion, pade approximation, remez algorithm using maple and so on....

But as soon as I implement them using only float precision, there is a significant loss of precision. And be sure: I know that with double precision, a much higher precision is no problem at all...

Right now, i have some approximations which are exact up to a few thousand ulp around pi/2 (the range where the largest errors occur), and i feel that i am limited by the single precision conversions.

To address the topic argument reduction: input is in radian. i assume that an argument reduction will cause even more precision loss due to divisions / multiplications.... since my overall input range is only 0..pi, i decided to reduce the argument to 0..pi/2.

Therefore my question is: Does anybody know a single precision approximation to cosine function with high accuracy (and in the best case high efficiency)? Are there any algorithms that optimize approximations for single precision? Do you know whether the built-in cosf function computes the values with single oder double precision internally? ~

float ua_cos_v2(float x)
{
    float output;
    float myPi = 3.1415927410125732421875f;
    if (x < 0) x = -x;
    int quad = (int32_t)(x*0.63661977236f);//quad = x/(pi/2) = x*2/pi
    if (x<1.58f && x> 1.57f) //exclude approximation around pi/2
    {
        output = -(x - 1.57079637050628662109375f) - 2.0e-12f*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f) + 0.16666667163372039794921875f*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f) + 2.0e-13f*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f)+ 0.000198412701138295233249664306640625f*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f)*(x - 1.57079637050628662109375f);
        output -= 4.37E-08f;
    }
    else {
        float param_x;
        int param_quad = -1;
        switch (quad)
        {
        case 0:
            param_x = x;
            break;
        case 1:
            param_x = myPi - x;
            param_quad = 1;
            break;
        case 2:
            param_x = x - myPi;
            break;
        case 3:
            param_x = 2 * myPi - x;
            break;
        }
        float c1 = 1.0f,
            c2 = -0.5f,
            c3 = 0.0416666679084300994873046875f,
            c4 = -0.001388888922519981861114501953125f,
            c5 = 0.00002480158218531869351863861083984375f,
            c6 = -2.75569362884198199026286602020263671875E-7f,
            c7 = 2.08583283978214240050874650478363037109375E-9f,
            c8 = -1.10807162057025010426514199934899806976318359375E-11f;
        float _x2 = param_x * param_x;
        output = c1 + _x2*(c2 + _x2*(c3 + _x2*(c4 + _x2*(c5 + _x2*(c6 + _x2*(c7 
        + _x2* c8))))));
        if (param_quad == 1 || param_quad == 0)
            output = -output;
    }
    return output;
}

if I have forgotten any information, please do not hesitate to ask!

Thanks in advance

解决方案

It is certainly possible to compute cosine on [0, π] with any desired error bound >= 0.5 ulp using just native precision operations. However, the closer the target is to a correctly rounded function, the more up-front design work and computational work at runtime is required.

Transcendental functions implementations typically consist of argument reduction, core approximation(s), final fixup to counteract the argument reduction. In cases where the argument reduction involves subtraction, catastrophic cancellation needs to be avoided by explicitly or implicitly using higher precision. Implicit techniques can be designed to rely just on native precision computation, for example by splitting a constant like π into an unevaluated sum such as 1.57079637e+0f - 4.37113883e-8f when using IEEE-754 binary32 (single precision).

Achieving high accuracy with native precision computation is a lot easier when the hardware provides a fused multiply-add (FMA) operation. OP did not specify whether their target platform provides this operation, so I will first show a very simple approach offering moderate accuracy (maximum error < 5 ulps) relying just on multiplies and adds. I am assuming hardware that adheres to the IEEE-754 standard, and assume that float is mapped to IEEE-754 binary32 format.

The following is based on a blog post by Colin Wallace titled "Approximating sin(x) to 5 ULP with Chebyshev polynomials", which is not available online at time of writing. I originally retrieved it here and Google presently retains a cached copy here. They propose to approximate sine on [-π, π] by using a polynomial in x² of sin(x)/(x*(x²-π²)), then multiplying this by x*(x²-π²). A standard trick to compute a²-b² more accurately is to rewrite it as (a-b) * (a+b). Representing π as an unevaluated sum of two floating-point numbers pi_high and pi_low avoids catastrophic cancellation during subtraction, which turns the computation x²-π² into ((x - pi_hi) - pi_lo) * ((x + pi_hi) + pi_lo).

The polynomial core approximation should ideally use a minimax approximation, which minimizes the maximum error. I have done so here. Various standard tools like Maple or Mathematics can be used for this, or one create one's own code based on the Remez algorithm.

For a cosine computation on [0, PI] we can make use of the fact that cos (t) = sin (π/2 - t). Substituting x = (π/2 - t) into x * (x - π/2) * (x + π/2) yields (π/2 - t) * (3π/2 - t) * (-π/2 - t). The constants can be split into high and low parts (or head and tail, to use another common idiom) as before.

/* Approximate cosine on [0, PI] with maximum error of 4.704174 ulp */
float cosine (float x)
{
    const float half_pi_hi       =  1.57079637e+0f; //  0x1.921fb6p+0
    const float half_pi_lo       = -4.37113883e-8f; // -0x1.777a5cp-25
    const float three_half_pi_hi =  4.71238899e+0f; //  0x1.2d97c8p+2
    const float three_half_pi_lo = -1.19248806e-8f; // -0x1.99bc5cp-27
    float p, s, hpmx, thpmx, nhpmx;

    /* cos(x) = sin (pi/2 - x) = sin (hpmx) */
    hpmx = (half_pi_hi - x) + half_pi_lo;               // pi/2-x
    thpmx = (three_half_pi_hi - x) + three_half_pi_lo;  // 3*pi/2 - x
    nhpmx = (-half_pi_hi - x) - half_pi_lo;             // -pi/2 - x

    /* P(hpmx*hpmx) ~= sin (hpmx) / (hpmx * (hpmx * hpmx - pi * pi)) */
    s = hpmx * hpmx;
    p =         1.32729383e-10f;
    p = p * s - 2.33177868e-8f;
    p = p * s + 2.52223435e-6f;
    p = p * s - 1.73503853e-4f;
    p = p * s + 6.62087463e-3f;
    p = p * s - 1.01321176e-1f;
    return hpmx * nhpmx * thpmx * p;
}

Below I am showing a classical approach which first reduces the argument into [-π/4, π/4] while recording the quadrant. The quadrant then tells us whether we need to compute a polynomial approximation to the sine or the cosine on this primary approximation interval, and whether we need to flip the sign of the final result. This code assumes that the target platform supports the FMA operation specified by IEEE-754, and that it is mapped via the standard C function fmaf() for single precision.

The code is straightforward except for the float-to-int conversion with rounding mode to-nearest-or-even that is used to compute the quadrant, which is performed by the "magic number addition" method and combined with the multiplication of 2/π (equivalent to division by π/2). The maximum error is less than 1.5 ulps.

/* compute cosine on [0, PI] with maximum error of 1.429027 ulp */
float my_cosf (float a)
{
    const float half_pi_hi =  1.57079637e+0f; //  0x1.921fb6p+0
    const float half_pi_lo = -4.37113883e-8f; // -0x1.777a5cp-25
    float c, j, r, s, sa, t;
    int i;

    /* subtract closest multiple of pi/2 giving reduced argument and quadrant */
    j = fmaf (a, 6.36619747e-1f, 12582912.f) - 12582912.f; // 2/pi, 1.5 * 2**23
    a = fmaf (j, -half_pi_hi, a);
    a = fmaf (j, -half_pi_lo, a);

    /* phase shift of pi/2 (one quadrant) for cosine */
    i = (int)j;
    i = i + 1;

    sa = a * a;
    /* Approximate cosine on [-PI/4,+PI/4] with maximum error of 0.87444 ulp */
    c =               2.44677067e-5f;  //  0x1.9a8000p-16
    c = fmaf (c, sa, -1.38877297e-3f); // -0x1.6c0efap-10
    c = fmaf (c, sa,  4.16666567e-2f); //  0x1.555550p-5
    c = fmaf (c, sa, -5.00000000e-1f); // -0x1.000000p-1
    c = fmaf (c, sa,  1.00000000e+0f); //  1.00000000p+0
    /* Approximate sine on [-PI/4,+PI/4] with maximum error of 0.64196 ulp */
    s =               2.86567956e-6f;  //  0x1.80a000p-19
    s = fmaf (s, sa, -1.98559923e-4f); // -0x1.a0690cp-13
    s = fmaf (s, sa,  8.33338592e-3f); //  0x1.111182p-7
    s = fmaf (s, sa, -1.66666672e-1f); // -0x1.555556p-3
    t = a * sa;
    s = fmaf (s, t, a);

    /* select sine approximation or cosine approximation based on quadrant */
    r = (i & 1) ? c : s;
    /* adjust sign based on quadrant */
    r = (i & 2) ? (0.0f - r) : r;

    return r;
}

As it turns out, in this particular case the use of FMA provides only a tiny benefit in terms of accuracy. If I replace calls to fmaf(a,b,c) with ((a)*(b)+(c)), the maximum error increases minimally to 1.451367 ulps, that is, it stays below 1.5 ulps.

这篇关于仅使用单精度浮点在[0，pi]上近似余弦的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持IT屋！

查看全文

仅使用单精度浮点在[0，pi]上近似余弦 [英] Approximating cosine on [0,pi] using only single precision floating point

问题描述

相关文章

其他开发最新文章

热门教程

热门工具

登录关闭

仅使用单精度浮点在[0，pi]上近似余弦 [英] Approximating cosine on [0,pi] using only single precision floating point

问题描述

相关文章

其他开发最新文章

热门教程

热门工具

登录 关闭

登录关闭