更快的四元数矢量乘法不工作 [英] Faster quaternion vector multiplication doesn't work

查看:146
本文介绍了更快的四元数矢量乘法不工作的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我需要一个更快的四元数矢量乘法例程为我的数学库。现在我使用规范 v'= qv(q ^ -1),这产生与将向量乘以从四元数形成的矩阵相同的结果,因此我相信这是正确的。



到目前为止,我已经实现了3个替代的更快的方法:



#1,我不知道从哪里得到这个:

  v'=(q.xyz * 2 * dot (q.xyz,v))+(v *(qw * qw-dot(q.xyz,q.zyx)))+(cross(q.xyz,v)* qw * w) code> 

实施为:

  vec3 rotateVector(const quat& q,const vec3& v)
{
vec3 u(qx,qy,qz);
float s = q.w;

return vec3(u * 2.0f * vec3 :: dot(u,v))
+(v *(s * s - vec3 :: dot(u,u)))
+(vec3 :: cross(u,v)* s * 2.0f);
}

#2,由这个精细博客

  t = 2 * cross(q.xyz,v); 
v'= v + q.w * t + cross(q.xyz,t);

实施为:

  __ m128 rotateVector(__ m128 q,__m128 v)
{
__m128 temp = _mm_mul_ps(vec4 :: cross(q,v),_mm_set1_ps(2.0f));
return _mm_add_ps(
_mm_add_ps(v,_mm_mul_ps(_mm_shuffle_ps(q,q,_MM_SHUFFLE(3,3,3,3)),temp)),
vec4 :: cross temp));
}

并且#3来自许多来源,

  v'= v + 2.0 * cross(cross(v,q.xyz)+ qw * v,q.xyz); 

实施为:

  __ m128 rotateVector(__ m128 q,__m128 v)
{
// return v + 2.0 * cross(cross(v,q.xyz)+ qw * v,q.xyz) ;
return _mm_add_ps(v,
_mm_mul_ps(_mm_set1_ps(2.0f),
vec4 :: cross(
_mm_add_ps(
_mm_mul_ps(_mm_shuffle_ps(q,q,_MM_SHUFFLE 3,3,3)),v),
vec4 :: cross(v,q)),
q)
}

这3个都会产生不正确的结果。但我注意到一些有趣的模式。首先,#1和#2产生相同的结果。 #3产生相同的结果,如果所述矩阵被转置(我发现这是偶然的,以前我的quat到矩阵代码假定行主矩阵,这是不正确的) p>

我的四元数的数据存储定义为:

  union 
{
__m128 data;
struct {float x,y,z,w; };
float f [4];
};我的实现是否有缺陷,或者我在这里缺少什么?

div class =h2_lin>解决方案

主要问题,如果你想通过四元数旋转3d矢量,你需要计算所有9个旋转矩阵的标量。在你的例子中,旋转矩阵的计算是IMPLICIT。计算的顺序可能不是最优的。



如果从四元数和乘法向量生成3x3矩阵,则应该具有相同数量的算术运算(@ see code at bottom)。



我推荐什么。


  1. 尝试生成矩阵3x3并乘以您的向量,测量速度并与上一个比较。


  2. 分析显式解决方案,并尝试针对自定义架构进行优化。

    以实现替代四元数乘法,以及从等式q * v * q'导出乘法。


// === =========
替代放大伪码

  / ** 
四元数乘法,
可以加速某些系统的乘法(例如PDA)
http://mathforum.org/library/drmath/view/51464.html
http://www.lboro .ac.uk / departments / ma / gallery / quat / src / quat.ps
在url的许多bug提供的代码,必须重写。
* /
inline xxquaternion mul_alt(const xxquaternion& q)const {
float t0 =(x-y)*(q.y-q.x);
float t1 =(w + z)*(q.w + q.z);
float t2 =(w-z)*(q.y + q.x);
float t3 =(x + y)*(q.w-q.z);
float t4 =(x-z)*(q.z-q.y);
float t5 =(x + z)*(q.z + q.y);
float t6 =(w + y)*(q.w-q.x);
float t7 =(w-y)*(q.w + q.x);

float t8 = t5 + t6 + t7
float t9 =(t4 + t8)* 0.5;
return xxquaternion(t3 + t9-t6,
t2 + t9-t7,
t1 + t9-t8,
t0 + t9-t5);
// 9乘法27 addidtions 8变量
//但是我们可以清除4个变量
/ *
float r = w,i = z,j = y,k = x;
float br = q.w,bi = q.z,bj = q.y,bk = q.x;
float t0 =(k-j)*(bj-bk);
float t1 =(r + i)*(br + bi);
float t2 =(r-i)*(bj + bk);
float t3 =(k + j)*(br-bi);
float t4 =(k-i)*(bi-bj);
float t5 =(k + i)*(bi + bj);
float t6 =(r + j)*(br-bk);
float t7 =(r-j)*(br + bk);
float t8 = t5 + t6 + t7;
float t9 =(t4 + t8)* 0.5;
float rr = t0 + t9-t5;
float ri = t1 + t9-t8;
float rj = t2 + t9-t7;
float rk = t3 + t9-t6;
return xxquaternion(rk,rj,ri,rr);
* /
}

// ======== ====
显式向量旋转变体

  / ** 
四舍五入向量旋转向量
* /
inline vector3 rotate(const vector3& v)const {
xxquaternion q(vx * w + vz * y - vy * z,
vy * w + vx * z - vz * x,
vz * w + vy * x-vx * y,
vx * x + vy * y + vz * z)

return vector3(w * qx + x * qw + y * qz - z * qy,
w * qy + y * qw + z * qx - x * qz,
w * qz + z * qw + x * qy-y * qx)*(1.0f / norm

// 29个乘法,20个addidtions,4个变量
// 5
/ *
//实现
xxquaternion r =(* this) * xxquaternion(vx,vy,vz,0)* this-> inverted();
return vector3(r.x,r.y,r.z);
* /

/ *
//替代实现
float wx,wy,wz,xx,yy,yz,xy,xz,zz,x2,y2 ,z2;
x2 = q.x + q.x; y2 = q.y + q.y; z2 = q.z + q.z;

xx = q.x * x2; xy = q.x * y2; xz = q.x * z2
yy = q.y * y2; yz = q.y * z2; zz = q.z * z2;
wx = q.w * x2; wy = q.w * y2; wz = q.w * z2;

return vector3(vx-vx *(yy + zz)+ vy *(xy-wz)+ vz *(xz + wy),
vy + vx * -vy *(xz + wx)-vz *(xx + yy))*(1.0) - vy *(xx + zz)+ vz *(yz-wx),
vz + vx * f / norm());
// 18个乘法,21个addidtions,12个变量
* /
};

祝你好运。


I need a faster quaternion-vector multiplication routine for my math library. Right now I'm using the canonical v' = qv(q^-1), which produces the same result as multiplying the vector by a matrix made from the quaternion, so I'm confident in it's correctness.

So far I've implemented 3 alternative "faster" methods:

#1, I have no idea where I got this one from:

v' = (q.xyz * 2 * dot(q.xyz, v)) + (v * (q.w*q.w - dot(q.xyz, q.zyx))) + (cross(q.xyz, v) * q.w * w)

Implemented as:

vec3 rotateVector(const quat& q, const vec3& v)
{
    vec3 u(q.x, q.y, q.z);
    float s = q.w;

    return vec3(u * 2.0f * vec3::dot(u, v))
    + (v * (s*s - vec3::dot(u, u)))
    + (vec3::cross(u, v) * s * 2.0f);
}

#2, courtesy of this fine blog

t = 2 * cross(q.xyz, v);
v' = v + q.w * t + cross(q.xyz, t);

Implemented as:

__m128 rotateVector(__m128 q, __m128 v)
{
    __m128 temp = _mm_mul_ps(vec4::cross(q, v), _mm_set1_ps(2.0f));
    return _mm_add_ps(
        _mm_add_ps(v, _mm_mul_ps(_mm_shuffle_ps(q, q, _MM_SHUFFLE(3, 3, 3, 3)), temp)),
        vec4::cross(q, temp));
}

And #3, from numerous sources,

v' = v + 2.0 * cross(cross(v, q.xyz) + q.w * v, q.xyz);

implemented as:

__m128 rotateVector(__m128 q, __m128 v)
{
    //return v + 2.0 * cross(cross(v, q.xyz) + q.w * v, q.xyz);
    return _mm_add_ps(v,
        _mm_mul_ps(_mm_set1_ps(2.0f),
            vec4::cross(
                _mm_add_ps(
                    _mm_mul_ps(_mm_shuffle_ps(q, q, _MM_SHUFFLE(3, 3, 3, 3)), v),
                    vec4::cross(v, q)),
                q)));
}

All 3 of these produce incorrect results. I have, however, noticed some interesting patterns. First of all, #1 and #2 produce the same result. #3 produces the same result that I get from multiplying the vector by a derived matrix if said matrix is transposed (I discovered this by accident, previously my quat-to-matrix code assumed row-major matrices, which was incorrect).

The data storage of my quaternions are defined as:

union
{
    __m128 data;
    struct { float x, y, z, w; };
    float f[4];
};

Are my implementations flawed, or am I missing something here?

解决方案

Main issue, if you want to rotate the 3d vector by quaternion, you require to calculate all 9 scalars of rotation matrix. In your examples, calculation of rotation matrix is IMPLICIT. The order of calculation can be not optimal.

If you generate 3x3 matrix from quaternion and multiply vector, you should have same number of arithmetic operations (@see code at bottom).

What i recommend.

  1. Try to generate matrix 3x3 and multiply your vector, measure the speed and compare with previous.

  2. Analyze the explicit solution, and try to optimize for custom architecture.

  3. try to implement alternative quaternion multiplication, and derived multiplication from equation q*v*q'.

//============ alternative multiplication pseudocode

 /**
      alternative way of quaternion multiplication,
      can speedup multiplication for some systems (PDA for example)
      http://mathforum.org/library/drmath/view/51464.html
      http://www.lboro.ac.uk/departments/ma/gallery/quat/src/quat.ps
      in provided code by url's many bugs, have to be rewriten.
    */
    inline xxquaternion mul_alt( const xxquaternion& q) const {
        float t0 = (x-y)*(q.y-q.x);
        float t1 = (w+z)*(q.w+q.z);
        float t2 = (w-z)*(q.y+q.x);
        float t3 = (x+y)*(q.w-q.z);
        float t4 = (x-z)*(q.z-q.y);
        float t5 = (x+z)*(q.z+q.y);
        float t6 = (w+y)*(q.w-q.x);
        float t7 = (w-y)*(q.w+q.x);

        float t8 = t5 + t6 + t7;
        float t9 = (t4 + t8)*0.5;
        return xxquaternion ( t3+t9-t6,
                              t2+t9-t7,
                              t1+t9-t8,
                              t0+t9-t5 );
        // 9 multiplications    27  addidtions    8 variables
        // but of couse we can clean 4 variables
/*
        float r = w, i = z, j = y, k =x;
        float br = q.w,  bi = q.z, bj = q.y, bk =q.x;
        float t0 = (k-j)*(bj-bk);
        float t1 = (r+i)*(br+bi);
        float t2 = (r-i)*(bj+bk);
        float t3 = (k+j)*(br-bi);
        float t4 = (k-i)*(bi-bj);
        float t5 = (k+i)*(bi+bj);
        float t6 = (r+j)*(br-bk);
        float t7 = (r-j)*(br+bk);
        float t8 = t5 + t6 + t7;
        float t9 = (t4 + t8)*0.5;
        float rr = t0+t9-t5;
        float ri = t1+t9-t8;
        float rj = t2+t9-t7;
        float rk = t3+t9-t6;
        return xxquaternion ( rk, rj, ri, rr );
*/
    }

//============ explicit vector rotation variants

/**
      rotate vector by quaternion
    */
    inline vector3 rotate(const vector3& v)const{
        xxquaternion q(v.x * w + v.z * y - v.y * z,
                       v.y * w + v.x * z - v.z * x,
                       v.z * w + v.y * x - v.x * y,
                       v.x * x + v.y * y + v.z * z);

        return vector3(w * q.x + x * q.w + y * q.z - z * q.y,
                       w * q.y + y * q.w + z * q.x - x * q.z,
                       w * q.z + z * q.w + x * q.y - y * q.x)*( 1.0f/norm() );

        // 29  multiplications, 20 addidtions, 4 variables
        // 5 
       /*
        // refrence implementation  
        xxquaternion r = (*this)*xxquaternion(v.x, v.y, v.z, 0)*this->inverted();
        return vector3( r.x, r.y, r.z ); 
       */

       /*
        // alternative implementation
        float wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
        x2 = q.x + q.x; y2 = q.y + q.y; z2 = q.z + q.z;

        xx = q.x * x2;   xy = q.x * y2;   xz = q.x * z2;
        yy = q.y * y2;   yz = q.y * z2;   zz = q.z * z2;
        wx = q.w * x2;   wy = q.w * y2;   wz = q.w * z2;

        return vector3( v.x  - v.x * (yy + zz) + v.y * (xy - wz) + v.z * (xz + wy),
                        v.y  + v.x * (xy + wz) - v.y * (xx + zz) + v.z * (yz - wx),
                        v.z  + v.x * (xz - wy) + v.y * (yz + wx) - v.z * (xx + yy) )*( 1.0f/norm() );
        // 18 multiplications, 21 addidtions, 12 variables
       */
    };

Good luck.

这篇关于更快的四元数矢量乘法不工作的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆