使用MMX指令进行4X4矩阵乘法 [英] 4X4 matrix multiplication using MMX instructions
本文介绍了使用MMX指令进行4X4矩阵乘法的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
有关如何使用MMX指令进行矩阵乘法的任何帮助?
假设:short A [4] [4] = {1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,4}乘以
短b [4] [4] = {9,9,9,9,8,8,8,8,7,7,7,7,6,6,6,6}}
any help on how i can do matrix multiplication using MMX instructions?
assuming: short A[4][4] ={1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4} multiply by
short b[4][4]= {9,9,9,9,8,8,8,8,7,7,7,7,6,6,6,6}
推荐答案
可以吗?
Is it OK?
void mult4x4(short m1[4][4], short m2[4][4], short res[4][4])
{
__asm
{
push esi
push edi
mov eax, m1
mov esi, m2
mov edi, res
; cache mem. access into free mmx reg.
movq mm5, [esi + 24]
mov edx, 8
mov ecx, 4
__repeat:
movq mm7, [eax]
movq mm0, [esi]
movq mm2, [esi + edx]
movq mm1, mm0
movq mm6, mm7
punpckldq mm7, mm6
punpcklwd mm0, mm2
punpckhwd mm1, mm2
pmaddwd mm0, mm7
pmaddwd mm1, mm7
movq mm3, mm0
movq mm4, mm1
movq mm7, mm6
punpckhdq mm7, mm6
movq mm0, [esi + edx * 2]
movq mm2, mm5 ; read row from reg. cache
movq mm1, mm0
punpcklwd mm0, mm2
punpckhwd mm1, mm2
pmaddwd mm0, mm7
pmaddwd mm1, mm7
paddd mm3, mm0
paddd mm4, mm1
packssdw mm3, mm3
packssdw mm4, mm4
punpckldq mm3, mm4
movq [edi], mm3
add edi, edx
add eax, edx
loop __repeat
emms
pop edi
pop esi
}
}
int main(void)
{
__declspec(align(16)) short m1[4][4] = { 1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4 };
__declspec(align(16)) short m2[4][4] = { 9,9,9,9,8,8,8,8,7,7,7,7,6,6,6,6 };
__declspec(align(16)) short result[4][4] = {0};
mult4x4(m1, m2, result);
for(int r = 0; r < 4; r++)
{
for(int c = 0; c < 4; c++)
cout << "\t" << result[r][c];
cout << endl;
}
cout << endl;
return 0;
}
您可以使用multhw指令,有关矩阵计算的信息,请参见网络:
http://mathworld.wolfram.com/MatrixMultiplication.html [
you can use multhw instruction, for matrix computation see on the web :
http://mathworld.wolfram.com/MatrixMultiplication.html[^]
这篇关于使用MMX指令进行4X4矩阵乘法的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!
查看全文