准确计算比例互补误差函数erfcx() [英] Accurate computation of scaled complementary error function, erfcx()

查看:267
本文介绍了准确计算比例互补误差函数erfcx()的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

通常由(code> erfcx )指定的(指数规模)互补误差函数在数学上定义为erfcx(x):= e 2 erfc(x)。它经常出现在物理学和化学的扩散问题中。虽然一些数学环境,如 MATLAB ,提供这个功能,它不在C标准的数学库中,它只提供了 erf() erfc()



可以直接基于数学定义来实现自己的 erfcx(),这只能在有限的输入域中工作,因为在正半平面 溢出,如 溢出例如,这个问题

>为了和C一起使用,可以使用一些 erfcx()开源实现,比如一个在 Faadeeva软件包中,正如对这个问题。但是,这些实现通常不会为给定的浮点格式提供完整的准确性。例如,使用2 32个测试向量进行的测试显示Faadeeva软件包提供的 erfcx()的最大错误为正数为8.41 ulps半平面和负半平面上的511.68个点。

准确实现的合理范围是4 ulps,对应于 $ b $ 如何才能 erfcx()和相应的单精度版本, erfcxf(),可以正确实现,只使用C标准数学库,不需要外部库?我们可以假设C的 float nad double 类型映射到IEEE 754-2008 binary32 binary64 浮点类型。硬件支持融合的乘法 - 加法运算(FMA),因为目前所有主要的处理器架构都支持这种运算。 解决方案

对于 erfcx()实现,目前为止我发现的最佳方法是基于以下文章:

微米。 M. Shepherd和J. G. Laframboise,0≤x<∞的(1 + 2x)exp(x2π)erfc x的切比雪夫近似。计算数学,第36卷,第153号,1981年1月,第249-253页(online) 本文提出了巧妙的转变,将经过缩放的互补误差函数映射到可以直接进行多项式逼近的紧束缚函数。为了表现,我已经尝试了变化的变化,但所有这些都对准确性有负面影响。在变换(x - K)/(x + K)中常数K的选择与核心近似的精度有着非常明显的关系。我经验地确定了最佳值,这与纸面不同。

将核心近似和中间结果的变换转换回 erfcx 结果会产生更多的舍入错误。为了减轻它们对准确性的影响,我们需要应用补偿步骤,这些步骤在我之前的 question&回答有关 erfcf 。 FMA的可用性大大简化了这一任务。

结果单精度代码如下所示:

<$ p $基于:MM Shepherd和JG Laframboise,
*(1 + 2x)exp(x ^ 2)erfc x的切比雪夫逼近0< code& = x *第153号,1981年1月,第249-253页。

$
float my_erfcxf(float x)
{
float a,d,e,m,p,q,r,s,t;

a = fmaxf(x,0.0f - x); //保留NaN绝对值计算

/ *准确计算q =(a-2)/(a + 2)。 [0,INF) - > [-1,1] * /
m = a - 2.0f;
p = a + 2.0f;
#if FAST_RCP_SSE
r = fast_recipf_sse(p);
#else
r = 1.0f / p;
#endif
q = m * r;
t = fmaf(q + 1.0f,-2.0f,a);
e = fmaf(q,-a,t);
q = fmaf(r,e,q);
$ b $ *在[-1,1] * / $ b中,近似(1 + 2 * a)* exp(a * a)* erfc(a) $ bp = 0x1.f10000p-15f; // 5.92470169e-5
p = fmaf(p,q,0x1.521cc6p-13f); // 1.61224554e-4
p = fmaf(p,q,-0x1.6b4ffep-12f); // -3.46481771e-4
p = fmaf(p,q,-0x1.6e2a7cp-10f); // -1.39681227e-3
p = fmaf(p,q,0x1.3c1d7ep-10f); // 1.20588380e-3
p = fmaf(p,q,0x1.1cc236p-07f); // 8.69014394e-3
p = fmaf(p,q,-0x1.069940p-07f); // -8.01387429e-3
p = fmaf(p,q,-0x1.bc1b6cp-05f); // -5.42122945e-2
p = fmaf(p,q,0x1.4ff8acp-03f); // 1.64048523e-1
p = fmaf(p,q,-0x1.54081ap-03f); // -1.66031078e-1
p = fmaf(p,q,-0x1.7bf5cep-04f); // -9.27637145e-2
p = fmaf(p,q,0x1.1ba03ap-02f); (1 + p)除以(1 + 2 * a)==>(2.76978403e-1

) exp(a * a)* erfc(a)* /
d = a + 0.5f;
#if FAST_RCP_SSE
r = fast_recipf_sse(d);
#else
r = 1.0f / d;
#endif
r = r * 0.5f;
q = fmaf(p,r,r); // q =(p + 1)/(1 + 2 * a)
t = q + q;
e =(p-q)+ fmaf(t,-a,1.0f); //剩余:(p + 1)-q *(1 + 2 * a)
r = fmaf(e,r,q);

if(a> 0x1.fffffep127f)r = 0.0f; // 3.40282347e + 38 //处理INF参数

处理负参数erfcx(x)= 2 * exp(x * x) - erfcx(| x |)* /
如果(x <0.0f){
s = x * x;
d = fmaf(x,x,-s);
e = expf(s);
r = e - r;
r = fmaf(e,d + d,r);
r = r + e;
if(e> 0x1.fffffep127f)r = e; // 3.40282347e + 38 //避免创建NaN
}
return r;



$ b $ p $这个实现在负半平面上的最大误差取决于标准数学库实现 expf()的准确性。使用英特尔编译器13.1.3.198,并使用 / fp:strict 进行编译时,我发现正半平面的最大误差为2.00450 ulps,负半径的最大误差为2.38412 ulps在一个详尽的测试半平面。最好我现在可以告诉, expf()的忠实圆整的实现将导致小于2.5 ulps的最大错误。



请注意,虽然代码包含两个可能较慢操作的分区,但它们以互补的特殊形式出现,因此可以在许多平台上使用快速倒数近似。只要倒数近似值可靠地取整,对实验的影响就可以忽略不计。即使是稍大的错误,例如在快速SSE版本中(最大错误<2.0 ulps),似乎也只有轻微的影响。

  / *快速倒数近似。 HW近似加牛顿迭代* / 
float fast_recipf_sse(float a)
{
__m128 t;
float e,r;
t = _mm_set_ss(a);
t = _mm_rcp_ss(t);
_mm_store_ss(& r,t);
e = fmaf(0.0f - a,r,1.0f);
r = fmaf(e,r,r);
return r;双精度版本 erfcx()
$ b $ / code>

c $ c>在结构上与单精度版本 erfcxf()相同,但需要使用包含更多项的最小多项式近似。这在优化核心近似值时提出了一个挑战,因为当搜索空间非常大时,许多启发式算法将会崩溃。下面的系数代表我迄今为止最好的解决方案,而且肯定有改进的空间。使用Intel编译器和 / fp:strict 构建,并使用2 32个随机测试向量,观察到的最大误差为正半数的2.83788 ulps

  double my_erfcx(double x)
{
double a,d,e,m,p,q,r,s,t;

a = fmax(x,0.0 - x); // NaN保存绝对值计算

/ *准确计算q =(a-4)/(a + 4)。 [0,INF) - > [-1,1] * /
m = a - 4.0;
p = a + 4.0;
r = 1.0 / p;
q = m * r;
t = fma(q + 1.0,-4.0,a);
e = fma(q,-a,t);
q = fma(r,e,q);
$ b $ *在[-1,1] * / $ b中,近似(1 + 2 * a)* exp(a * a)* erfc(a) $ bp = 0x1.edcad78fc8044p-31; // 8.9820305531190140e-10
p = fma(p,q,0x1.b1548f14735d1p-30); // 1.5764464777959401e-09
p = fma(p,q,-0x1.a1ad2e6c4a7a8p-27); // -1.2155985739342269e-08
p = fma(p,q,-0x1.1985b48f08574p-26); // -1.6386753783877791e-08
p = fma(p,q,0x1.c6a8093ac4f83p-24); // 1.0585794011876720e-07
p = fma(p,q,0x1.31c2b2b44b731p-24); // 7.1190423171700940e-08
p = fma(p,q,-0x1.b87373facb29fp-21); // -8.2040389712752056e-07
p = fma(p,q,0x1.3fef1358803b7p-22); // 2.9796165315625938e-07
p = fma(p,q,0x1.7eec072bb0be3p-18); // 5.7059822144459833e-06
p = fma(p,q,-0x1.78a680a741c4ap-17); // -1.1225056665965572e-05
p = fma(p,q,-0x1.9951f39295cf4p-16); // -2.4397380523258482e-05
p = fma(p,q,0x1.3be1255ce180bp-13); // 1.5062307184282616e-04
p = fma(p,q,-0x1.a1df71176b791p-13); // -1.9925728768782324e-04
p = fma(p,q,-0x1.8d4aaa0099bc8p-11); // -7.5777369791018515e-04
p = fma(p,q,0x1.49c673066c831p-8); // 5.0319701025945277e-03
p = fma(p,q,-0x1.0962386ea02b7p-6); // -1.6197733983519948e-02
p = fma(p,q,0x1.3079edf465cc3p-5); // 3.7167515521269866e-02
p = fma(p,q,-0x1.0fb06dfedc4ccp-4); // -6.6330365820039094e-02
p = fma(p,q,0x1.7fee004e266dfp-4); // 9.3732834999538536e-02
p = fma(p,q,-0x1.9ddb23c3e14d2p-4); // -1.0103906603588378e-01
p = fma(p,q,0x1.16ecefcfa4865p-4); // 6.8097054254651804e-02
p = fma(p,q,0x1.f7f5df66fc349p-7); // 1.5379652102610957e-02
p = fma(p,q,-0x1.1df1ad154a27fp-3); // -1.3962111684056208e-01
p = fma(p,q,0x1.dd2c8b74febf6p-3); (1 + p)乘以(1 + 2 * a)==>(2.3299511862555250e-01

) exp(a * a)* erfc(a)* /
d = a + 0.5;
r = 1.0 / d;
r = r * 0.5;
q = fma(p,r,r); // q =(p + 1)/(1 + 2 * a)
t = q + q;
e =(p-q)+ fma(t,-a,1.0); //残差:(p + 1)-q *(1 + 2 * a)
r = fma(e,r,q);
$ b $ *如果(a> 0x1.fffffffffffffp1023)r = 0.0,处理无穷* /
的参数;
$ b $ / *处理否定参数:erfcx(x)= 2 * exp(x * x) - erfcx(| x |)* /
if(x <0.0){
s = x * x;
d = fma(x,x,-s);
e = exp(s);
r = e - r;
r = fma(e,d + d,r);
r = r + e;
if(e> 0x1.fffffffffffffp1023)r = e; //避免创建NaN
}
return r;
}


The (exponentially) scaled complementary error function, commonly designated by erfcx, is defined mathematically as erfcx(x) := ex2 erfc(x). It frequently occurs in diffusion problems in physics as well as chemistry. While some mathematical environments, such as MATLAB and GNU Octave, provide this function, it is absent from the C standard math library, which only provides erf() and erfc().

While it is possible to implement one's own erfcx() based directly on the mathematical definition, this only works over a limited input domain, because in the positive half-plane erfc() underflows for arguments of moderate magnitude, while exp() overflows, as noted in this question, for example.

For use with C, one could adapt some erfcx() open-source implementations such as the one in the Faadeeva package, as pointed to in responses to this question. However, these implementations typically do not provide full accuracy for a given floating-point format. For example, tests with 232 test vectors show the maximum error of erfcx() as provided by the Faadeeva package to be 8.41 ulps in the positive half-plane and 511.68 ulps in the negative half-plane.

A reasonable bound for an accurate implementation would be 4 ulps, corresponding to the accuracy bound of the math functions in the LA profile of Intel's Vector Math library, which I have found to be a reasonable bound for non-trivial math function implementations that require both good accuracy and good performance.

How could erfcx(), and the corresponding single-precision version, erfcxf(), be implemented accurately, while using only the C standard math library, and requiring no external libraries? We can assume that C's float nad double types are mapped to IEEE 754-2008 binary32 and binary64 floating-point types. Hardware support for the fused multiply-add operation (FMA) can be assumed, as this is supported by all major processor architectures at this time.

解决方案

The best approach for an erfcx() implementation I have found so far is based on the following paper:

M. M. Shepherd and J. G. Laframboise, "Chebyshev Approximation of (1 + 2 x) exp(x2) erfc x in 0 ≤ x < ∞." Mathematics of Computation, Volume 36, No. 153, January 1981, pp. 249-253 (online)

The paper proposes clever transformations that map the scaled complementary error function to a tightly bounded helper function that is amenable to straightforward polynomial approximation. I have experimented with variations of the transformations for the sake of performance, but all of these have had negative impact on accuracy. The choice of the constant K in the transformation (x - K) / (x + K) has a non-obvious relationship with the accuracy of the core approximation. I empirically determined "optimal" values, which differ from the paper.

The transformations for the arguments to the core approximation and the intermediate results back into into erfcx results incur additional rounding errors. To mitigate their impact on accuracy, we need to apply compensation steps, which I outlined in some detail in my earlier question & answer regarding erfcf. The availability of FMA greatly simplifies this task.

The resulting single-precision code looks as follows:

/*  
 * Based on: M. M. Shepherd and J. G. Laframboise, "Chebyshev Approximation of 
 * (1+2x)exp(x^2)erfc x in 0 <= x < INF", Mathematics of Computation, Vol. 36,
 * No. 153, January 1981, pp. 249-253.
 *
 */  
float my_erfcxf (float x)
{
    float a, d, e, m, p, q, r, s, t;

    a = fmaxf (x, 0.0f - x); // NaN-preserving absolute value computation

    /* Compute q = (a-2)/(a+2) accurately. [0,INF) -> [-1,1] */
    m = a - 2.0f;
    p = a + 2.0f;
#if FAST_RCP_SSE
    r = fast_recipf_sse (p);
#else
    r = 1.0f / p;
#endif
    q = m * r;
    t = fmaf (q + 1.0f, -2.0f, a); 
    e = fmaf (q, -a, t); 
    q = fmaf (r, e, q); 

    /* Approximate (1+2*a)*exp(a*a)*erfc(a) as p(q)+1 for q in [-1,1] */
    p =              0x1.f10000p-15f;  //  5.92470169e-5
    p = fmaf (p, q,  0x1.521cc6p-13f); //  1.61224554e-4
    p = fmaf (p, q, -0x1.6b4ffep-12f); // -3.46481771e-4
    p = fmaf (p, q, -0x1.6e2a7cp-10f); // -1.39681227e-3
    p = fmaf (p, q,  0x1.3c1d7ep-10f); //  1.20588380e-3
    p = fmaf (p, q,  0x1.1cc236p-07f); //  8.69014394e-3
    p = fmaf (p, q, -0x1.069940p-07f); // -8.01387429e-3
    p = fmaf (p, q, -0x1.bc1b6cp-05f); // -5.42122945e-2
    p = fmaf (p, q,  0x1.4ff8acp-03f); //  1.64048523e-1
    p = fmaf (p, q, -0x1.54081ap-03f); // -1.66031078e-1
    p = fmaf (p, q, -0x1.7bf5cep-04f); // -9.27637145e-2
    p = fmaf (p, q,  0x1.1ba03ap-02f); //  2.76978403e-1

    /* Divide (1+p) by (1+2*a) ==> exp(a*a)*erfc(a) */
    d = a + 0.5f;
#if FAST_RCP_SSE
    r = fast_recipf_sse (d);
#else
    r = 1.0f / d;
#endif
    r = r * 0.5f;
    q = fmaf (p, r, r); // q = (p+1)/(1+2*a)
    t = q + q;
    e = (p - q) + fmaf (t, -a, 1.0f); // residual: (p+1)-q*(1+2*a)
    r = fmaf (e, r, q);

    if (a > 0x1.fffffep127f) r = 0.0f; // 3.40282347e+38 // handle INF argument

    /* Handle negative arguments: erfcx(x) = 2*exp(x*x) - erfcx(|x|) */
    if (x < 0.0f) {
        s = x * x;
        d = fmaf (x, x, -s);
        e = expf (s);
        r = e - r;
        r = fmaf (e, d + d, r); 
        r = r + e;
        if (e > 0x1.fffffep127f) r = e; // 3.40282347e+38 // avoid creating NaN
    }
    return r;
}

The maximum error of this implementation in the negative half-plane will depend on the accuracy of the standard math library's implementation of expf(). Using the Intel compiler, version 13.1.3.198, and compiling with /fp:strict I observed a maximum error of 2.00450 ulps in the positive half-plane and 2.38412 ulps in the negative half-plane in an exhaustive test. Best I can tell at this time, a faithfully-rounded implementation of expf() will result in a maximum error of less than 2.5 ulps.

Note that while the code contains two divisions, which are potentially slow operations, they occur in the special form of reciprocals, and are thus amenable to the use of fast reciprocal approximations on many platforms. As long as the reciprocal approximation is faithfully rounded, the impact on erfcxf() accuracy seems to be negligible, based on experiments. Even slightly larger errors, such as in the fast SSE version (with maximum error < 2.0 ulps) appear to have only a minor impact.

/* Fast reciprocal approximation. HW approximation plus Newton iteration */
float fast_recipf_sse (float a)
{
    __m128 t;
    float e, r;
    t = _mm_set_ss (a);
    t = _mm_rcp_ss (t);
    _mm_store_ss (&r, t);
    e = fmaf (0.0f - a, r, 1.0f);
    r = fmaf (e, r, r);
    return r;
}

Double-precision version erfcx() is structurally identical to the single-precision version erfcxf(), but requires a minimax polynomial approximation with many more terms. This presents a challenge when optimizing the core approximation, as many heuristics will break down when the search space is very large. The coefficients below represent my best solution to date, and there is definitely room for improvement. Building with the Intel compiler and /fp:strict, and using 232 random test vectors, the maximum error observed was 2.83788 ulps in the positive half-plane and 2.77856 ulps in the negative half-plane.

double my_erfcx (double x)
{
    double a, d, e, m, p, q, r, s, t;

    a = fmax (x, 0.0 - x); // NaN preserving absolute value computation

    /* Compute q = (a-4)/(a+4) accurately. [0,INF) -> [-1,1] */
    m = a - 4.0;
    p = a + 4.0;
    r = 1.0 / p;
    q = m * r;
    t = fma (q + 1.0, -4.0, a); 
    e = fma (q, -a, t); 
    q = fma (r, e, q); 

    /* Approximate (1+2*a)*exp(a*a)*erfc(a) as p(q)+1 for q in [-1,1] */
    p =             0x1.edcad78fc8044p-31;  //  8.9820305531190140e-10
    p = fma (p, q,  0x1.b1548f14735d1p-30); //  1.5764464777959401e-09
    p = fma (p, q, -0x1.a1ad2e6c4a7a8p-27); // -1.2155985739342269e-08
    p = fma (p, q, -0x1.1985b48f08574p-26); // -1.6386753783877791e-08
    p = fma (p, q,  0x1.c6a8093ac4f83p-24); //  1.0585794011876720e-07
    p = fma (p, q,  0x1.31c2b2b44b731p-24); //  7.1190423171700940e-08
    p = fma (p, q, -0x1.b87373facb29fp-21); // -8.2040389712752056e-07
    p = fma (p, q,  0x1.3fef1358803b7p-22); //  2.9796165315625938e-07
    p = fma (p, q,  0x1.7eec072bb0be3p-18); //  5.7059822144459833e-06
    p = fma (p, q, -0x1.78a680a741c4ap-17); // -1.1225056665965572e-05
    p = fma (p, q, -0x1.9951f39295cf4p-16); // -2.4397380523258482e-05
    p = fma (p, q,  0x1.3be1255ce180bp-13); //  1.5062307184282616e-04
    p = fma (p, q, -0x1.a1df71176b791p-13); // -1.9925728768782324e-04
    p = fma (p, q, -0x1.8d4aaa0099bc8p-11); // -7.5777369791018515e-04
    p = fma (p, q,  0x1.49c673066c831p-8);  //  5.0319701025945277e-03
    p = fma (p, q, -0x1.0962386ea02b7p-6);  // -1.6197733983519948e-02
    p = fma (p, q,  0x1.3079edf465cc3p-5);  //  3.7167515521269866e-02
    p = fma (p, q, -0x1.0fb06dfedc4ccp-4);  // -6.6330365820039094e-02
    p = fma (p, q,  0x1.7fee004e266dfp-4);  //  9.3732834999538536e-02
    p = fma (p, q, -0x1.9ddb23c3e14d2p-4);  // -1.0103906603588378e-01
    p = fma (p, q,  0x1.16ecefcfa4865p-4);  //  6.8097054254651804e-02
    p = fma (p, q,  0x1.f7f5df66fc349p-7);  //  1.5379652102610957e-02
    p = fma (p, q, -0x1.1df1ad154a27fp-3);  // -1.3962111684056208e-01
    p = fma (p, q,  0x1.dd2c8b74febf6p-3);  //  2.3299511862555250e-01

    /* Divide (1+p) by (1+2*a) ==> exp(a*a)*erfc(a) */
    d = a + 0.5;
    r = 1.0 / d;
    r = r * 0.5;
    q = fma (p, r, r); // q = (p+1)/(1+2*a)
    t = q + q;
    e = (p - q) + fma (t, -a, 1.0); // residual: (p+1)-q*(1+2*a)
    r = fma (e, r, q);

    /* Handle argument of infinity */
    if (a > 0x1.fffffffffffffp1023) r = 0.0;

    /* Handle negative arguments: erfcx(x) = 2*exp(x*x) - erfcx(|x|) */
    if (x < 0.0) {
        s = x * x;
        d = fma (x, x, -s);
        e = exp (s);
        r = e - r;
        r = fma (e, d + d, r); 
        r = r + e;
        if (e > 0x1.fffffffffffffp1023) r = e; // avoid creating NaN
    }
    return r;
}

这篇关于准确计算比例互补误差函数erfcx()的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆