如何在CUDA下改善此功能? [英] How can I improve this function under CUDA?

查看:101
本文介绍了如何在CUDA下改善此功能?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我可以在CUDA下改进以下功能吗?

Can I improve the following function under CUDA?

该功能的作用是

给出一个minmaxELM1ELM,检查在数组min到max的任何行中是否找到三个数组ans[6]. >,D2D3D4D5D6,如果找到则返回1.

Given a min and max, ELM1 and ELM, check if any three numbers of array ans[6] are found in any row, from min to max, in array D1, D2, D3, D4, D5, D6, if found return 1.

我尝试使用loopsOR -ing,AND -ing,将goto替换为标志等.但这似乎是最快的方法.

I tried using loops, OR-ing, AND-ing, replacing goto with flag etc. etc. But this seems to be the fastest way.

 __device__ bool THREEA(unsigned int n0, unsigned int n,unsigned int* ST1,unsigned int* D1, unsigned int* D2,unsigned int* D3,unsigned int* D4,unsigned int* D5,unsigned int* D6,unsigned int* ans)
{
     unsigned int ELM, ELM1,flag;
     ELM = ST1[n0]+n;  //local.37

     ELM1 = n;       //local.33
     while (ELM1 < ELM)
     {

         flag = 0;
         if (D1[ELM1] == ans[0])
         {
          flag++;
         }
         if (D2[ELM1] == ans[0])
         {
          flag++;
         }
         if (D3[ELM1] == ans[0])
         {
          flag++;
         }
         if (D4[ELM1] == ans[0])
         {
          flag++;
         }
         if (D5[ELM1] == ans[0])
         {
          flag++;
         }
         if (D6[ELM1] == ans[0])
         {
          flag++;
         }
         if (flag != 1)
          goto onethreefour;
         if (D1[ELM1] == ans[1])
         {
          flag++;
         }
         if (D2[ELM1] == ans[1])
         {
          flag++;
         }
         if (D3[ELM1] == ans[1])
         {
          flag++;
         }
         if (D4[ELM1] == ans[1])
         {
          flag++;
         }
         if (D5[ELM1] == ans[1])
         {
          flag++;
         }
         if (D6[ELM1] == ans[1])
         {
          flag++;
         }
         if (flag != 2)
          goto onethreefour;
         if (D1[ELM1] == ans[2])
         {
          return 1;
         }
         if (D2[ELM1] == ans[2])
         {
          return 1;
         }
         if (D3[ELM1] == ans[2])
         {
          return 1;
         }
         if (D4[ELM1] == ans[2])
         {
          return 1;
         }
         if (D5[ELM1] == ans[2])
         {
          return 1;
         }
         if (D6[ELM1] == ans[2])
         {
          return 1;
         }
         if (D1[ELM1] == ans[3])
         {
          return 1;
         }
         if (D2[ELM1] == ans[3])
         {
          return 1;
         }
         if (D3[ELM1] == ans[3])
         {
          return 1;
         }
         if (D4[ELM1] == ans[3])
         {
          return 1;
         }
         if (D5[ELM1] == ans[3])
         {
          return 1;
         }
         if (D6[ELM1] == ans[3])
         {
          return 1;
         }
         if (D1[ELM1] == ans[4])
         {
          return 1;
         }
         if (D2[ELM1] == ans[4])
         {
          return 1;
         }
         if (D3[ELM1] == ans[4])
         {
          return 1;
         }
         if (D4[ELM1] == ans[4])
         {
          return 1;
         }
         if (D5[ELM1] == ans[4])
         {
          return 1;
         }
         if (D6[ELM1] == ans[4])
         {
          return 1;
         }
         if (D1[ELM1] == ans[5])
         {
          return 1;
         }
         if (D2[ELM1] == ans[5])
         {
          return 1;
         }
         if (D3[ELM1] == ans[5])
         {
          return 1;
         }
         if (D4[ELM1] == ans[5])
         {
          return 1;
         }
         if (D5[ELM1] == ans[5])
         {
          return 1;
         }
         if (D6[ELM1] == ans[5])
         {
          return 1;
         }

onethreefour:
         flag = 0;
         if (D1[ELM1] == ans[0])
         {
          flag++;
         }
         if (D2[ELM1] == ans[0])
         {
          flag++;
         }
         if (D3[ELM1] == ans[0])
         {
          flag++;
         }
         if (D4[ELM1] == ans[0])
         {
          flag++;
         }
         if (D5[ELM1] == ans[0])
         {
          flag++;
         }
         if (D6[ELM1] == ans[0])
         {
          flag++;
         }
         if (flag != 1)
          goto onefourfive;
         if (D1[ELM1] == ans[2])
         {
          flag++;
         }
         if (D2[ELM1] == ans[2])
         {
          flag++;
         }
         if (D3[ELM1] == ans[2])
         {
          flag++;
         }
         if (D4[ELM1] == ans[2])
         {
          flag++;
         }
         if (D5[ELM1] == ans[2])
         {
          flag++;
         }
         if (D6[ELM1] == ans[2])
         {
          flag++;
         }
         if (flag != 2)
          goto onefourfive;
         if (D1[ELM1] == ans[3])
         {
          return 1;
         }
         if (D2[ELM1] == ans[3])
         {
          return 1;
         }
         if (D3[ELM1] == ans[3])
         {
          return 1;
         }
         if (D4[ELM1] == ans[3])
         {
          return 1;
         }
         if (D5[ELM1] == ans[3])
         {
          return 1;
         }
         if (D6[ELM1] == ans[3])
         {
          return 1;
         }
         if (D1[ELM1] == ans[4])
         {
          return 1;
         }
         if (D2[ELM1] == ans[4])
         {
          return 1;
         }
         if (D3[ELM1] == ans[4])
         {
          return 1;
         }
         if (D4[ELM1] == ans[4])
         {
          return 1;
         }
         if (D5[ELM1] == ans[4])
         {
          return 1;
         }
         if (D6[ELM1] == ans[4])
         {
          return 1;
         }
         if (D1[ELM1] == ans[5])
         {
          return 1;
         }
         if (D2[ELM1] == ans[5])
         {
          return 1;
         }
         if (D3[ELM1] == ans[5])
         {
          return 1;
         }
         if (D4[ELM1] == ans[5])
         {
          return 1;
         }
         if (D5[ELM1] == ans[5])
         {
          return 1;
         }
         if (D6[ELM1] == ans[5])
         {
          return 1;
         }


onefourfive:
         flag = 0;
         if (D1[ELM1] == ans[0])
         {
          flag++;
         }
         if (D2[ELM1] == ans[0])
         {
          flag++;
         }
         if (D3[ELM1] == ans[0])
         {
          flag++;
         }
         if (D4[ELM1] == ans[0])
         {
          flag++;
         }
         if (D5[ELM1] == ans[0])
         {
          flag++;
         }
         if (D6[ELM1] == ans[0])
         {
          flag++;
         }
         if (flag != 1)
          goto onefivesix;
         if (D1[ELM1] == ans[3])
         {
          flag++;
         }
         if (D2[ELM1] == ans[3])
         {
          flag++;
         }
         if (D3[ELM1] == ans[3])
         {
          flag++;
         }
         if (D4[ELM1] == ans[3])
         {
          flag++;
         }
         if (D5[ELM1] == ans[3])
         {
          flag++;
         }
         if (D6[ELM1] == ans[3])
         {
          flag++;
         }
         if (flag != 2)
          goto onefivesix;
         if (D1[ELM1] == ans[4])
         {
          return 1;
         }
         if (D2[ELM1] == ans[4])
         {
          return 1;
         }
         if (D3[ELM1] == ans[4])
         {
          return 1;
         }
         if (D4[ELM1] == ans[4])
         {
          return 1;
         }
         if (D5[ELM1] == ans[4])
         {
          return 1;
         }
         if (D6[ELM1] == ans[4])
         {
          return 1;
         }
         if (D1[ELM1] == ans[5])
         {
          return 1;
         }
         if (D2[ELM1] == ans[5])
         {
         return 1;
         }
         if (D3[ELM1] == ans[5])
         {
          return 1;
         }
         if (D4[ELM1] == ans[5])
         {
          return 1;
         }
         if (D5[ELM1] == ans[5])
         {
         return 1;
         }
         if (D6[ELM1] == ans[5])
         {
          return 1;
         }

onefivesix:
         flag = 0;
         if (D1[ELM1] == ans[0])
         {
          flag++;
         }
         if (D2[ELM1] == ans[0])
         {
          flag++;
         }
         if (D3[ELM1] == ans[0])
         {
          flag++;
         }
         if (D4[ELM1] == ans[0])
         {
          flag++;
         }
         if (D5[ELM1] == ans[0])
         {
          flag++;
         }
         if (D6[ELM1] == ans[0])
         {
          flag++;
         }
         if (flag != 1)
          goto twothreefour;
         if (D1[ELM1] == ans[4])
         {
          flag++;
         }
         if (D2[ELM1] == ans[4])
         {
          flag++;
         }
         if (D3[ELM1] == ans[4])
         {
          flag++;
         }
         if (D4[ELM1] == ans[4])
         {
          flag++;
         }
         if (D5[ELM1] == ans[4])
         {
          flag++;
         }
         if (D6[ELM1] == ans[4])
         {
          flag++;
         }
         if (flag != 2)
          goto twothreefour;
         if (D1[ELM1] == ans[5])
         {
          return 1;
         }
         if (D2[ELM1] == ans[5])
         {
          return 1;
         }
         if (D3[ELM1] == ans[5])
         {
          return 1;
         }
         if (D4[ELM1] == ans[5])
         {
          return 1;
         }
         if (D5[ELM1] == ans[5])
         {
          return 1;
         }
         if (D6[ELM1] == ans[5])
         {
          return 1;
         }
twothreefour:
         flag = 0;
         if (D1[ELM1] == ans[1])
         {
          flag++;
         }
         if (D2[ELM1] == ans[1])
         {
          flag++;
         }
         if (D3[ELM1] == ans[1])
         {
          flag++;
         }
         if (D4[ELM1] == ans[1])
         {
          flag++;
         }
         if (D5[ELM1] == ans[1])
         {
          flag++;
         }
         if (D6[ELM1] == ans[1])
         {
          flag++;
         }
         if (flag != 1)
          goto twofourfive;
         if (D1[ELM1] == ans[2])
         {
          flag++;
         }
         if (D2[ELM1] == ans[2])
         {
          flag++;
         }
         if (D3[ELM1] == ans[2])
         {
          flag++;
         }
         if (D4[ELM1] == ans[2])
         {
          flag++;
         }
         if (D5[ELM1] == ans[2])
         {
          flag++;
         }
         if (D6[ELM1] == ans[2])
         {
          flag++;
         }
         if (flag != 2)
          goto twofourfive;
         if (D1[ELM1] == ans[3])
         {
          return 1;
         }
         if (D2[ELM1] == ans[3])
         {
          return 1;
         }
         if (D3[ELM1] == ans[3])
         {
          return 1;
         }
         if (D4[ELM1] == ans[3])
         {
          return 1;
         }
         if (D5[ELM1] == ans[3])
         {
          return 1;
         }
         if (D6[ELM1] == ans[3])
         {
          return 1;
         }
         if (D1[ELM1] == ans[4])
         {
          return 1;
         }
         if (D2[ELM1] == ans[4])
         {
          return 1;
         }
         if (D3[ELM1] == ans[4])
         {
          return 1;
         }
         if (D4[ELM1] == ans[4])
         {
          return 1;
         }
         if (D5[ELM1] == ans[4])
         {
          return 1;
         }
         if (D6[ELM1] == ans[4])
         {
          return 1;
         }
         if (D1[ELM1] == ans[5])
         {
          return 1;
         }
         if (D2[ELM1] == ans[5])
         {
          return 1;
         }
         if (D3[ELM1] == ans[5])
         {
          return 1;
         }
         if (D4[ELM1] == ans[5])
         {
          return 1;
         }
         if (D5[ELM1] == ans[5])
         {
          return 1;
         }
         if (D6[ELM1] == ans[5])
         {
          return 1;
         }
twofourfive:
         flag = 0;
         if (D1[ELM1] == ans[1])
         {
          flag++;
         }
         if (D2[ELM1] == ans[1])
         {
          flag++;
         }
         if (D3[ELM1] == ans[1])
         {
          flag++;
         }
         if (D4[ELM1] == ans[1])
         {
          flag++;
         }
         if (D5[ELM1] == ans[1])
         {
          flag++;
         }
         if (D6[ELM1] == ans[1])
         {
          flag++;
         }
         if (flag != 1)
          goto twofivesix;
         if (D1[ELM1] == ans[3])
         {
          flag++;
         }
         if (D2[ELM1] == ans[3])
         {
          flag++;
         }
         if (D3[ELM1] == ans[3])
         {
          flag++;
         }
         if (D4[ELM1] == ans[3])
         {
          flag++;
         }
         if (D5[ELM1] == ans[3])
         {
          flag++;
         }
         if (D6[ELM1] == ans[3])
         {
          flag++;
         }
         if (flag != 2)
          goto twofivesix;
         if (D1[ELM1] == ans[4])
         {
          return 1;
         }
         if (D2[ELM1] == ans[4])
         {
          return 1;
         }
         if (D3[ELM1] == ans[4])
         {
          return 1;
         }
         if (D4[ELM1] == ans[4])
         {
          return 1;
         }
         if (D5[ELM1] == ans[4])
         {
          return 1;
         }
         if (D6[ELM1] == ans[4])
         {
          return 1;
         }
         if (D1[ELM1] == ans[5])
         {
          return 1;
         }
         if (D2[ELM1] == ans[5])
         {
          return 1;
         }
         if (D3[ELM1] == ans[5])
         {
          return 1;
         }
         if (D4[ELM1] == ans[5])
         {
          return 1;
         }
         if (D5[ELM1] == ans[5])
         {
          return 1;
         }
         if (D6[ELM1] == ans[5])
         {
          return 1;
         }
twofivesix:
         flag = 0;
         if (D1[ELM1] == ans[1])
         {
          flag++;
         }
         if (D2[ELM1] == ans[1])
         {
          flag++;
         }
         if (D3[ELM1] == ans[1])
         {
          flag++;
         }
         if (D4[ELM1] == ans[1])
         {
          flag++;
         }
         if (D5[ELM1] == ans[1])
         {
          flag++;
         }
         if (D6[ELM1] == ans[1])
         {
          flag++;
         }
         if (flag != 1)
          goto threefourfive;
         if (D1[ELM1] == ans[4])
         {
          flag++;
         }
         if (D2[ELM1] == ans[4])
         {
          flag++;
         }
         if (D3[ELM1] == ans[4])
         {
          flag++;
         }
         if (D4[ELM1] == ans[4])
         {
          flag++;
         }
         if (D5[ELM1] == ans[4])
         {
          flag++;
         }
         if (D6[ELM1] == ans[4])
         {
          flag++;
         }
         if (flag != 2)
          goto threefourfive;
         if (D1[ELM1] == ans[5])
         {
          return 1;
         }
         if (D2[ELM1] == ans[5])
         {
          return 1;
         }
         if (D3[ELM1] == ans[5])
         {
          return 1;
         }
         if (D4[ELM1] == ans[5])
         {
          return 1;
         }
         if (D5[ELM1] == ans[5])
         {
          return 1;
         }
         if (D6[ELM1] == ans[5])
         {
          return 1;
         } 
threefourfive:
         flag = 0;
         if (D1[ELM1] == ans[2])
         {
          flag++;
         }
         if (D2[ELM1] == ans[2])
         {
          flag++;
         }
         if (D3[ELM1] == ans[2])
         {
          flag++;
         }
         if (D4[ELM1] == ans[2])
         {
          flag++;
         }
         if (D5[ELM1] == ans[2])
         {
          flag++;
         }
         if (D6[ELM1] == ans[2])
         {
          flag++;
         }
         if (flag != 1)
          goto threefivesix;
         if (D1[ELM1] == ans[3])
         {
          flag++;
         }
         if (D2[ELM1] == ans[3])
         {
          flag++;
         }
         if (D3[ELM1] == ans[3])
         {
          flag++;
         }
         if (D4[ELM1] == ans[3])
         {
          flag++;
         }
         if (D5[ELM1] == ans[3])
         {
          flag++;
         }
         if (D6[ELM1] == ans[3])
         {
          flag++;
         }
         if (flag != 2)
          goto threefivesix;
         if (D1[ELM1] == ans[4])
         {
          return 1;
         }
         if (D2[ELM1] == ans[4])
         {
          return 1;
         }
         if (D3[ELM1] == ans[4])
         {
          return 1;
         }
         if (D4[ELM1] == ans[4])
         {
          return 1;
         }
         if (D5[ELM1] == ans[4])
         {
          return 1;
         }
         if (D6[ELM1] == ans[4])
         {
          return 1;
         }
         if (D1[ELM1] == ans[5])
         {
          return 1;
         }
         if (D2[ELM1] == ans[5])
         {
          return 1;
         }
         if (D3[ELM1] == ans[5])
         {
          return 1;
         }
         if (D4[ELM1] == ans[5])
         {
          return 1;
         }
         if (D5[ELM1] == ans[5])
         {
          return 1;
         }
         if (D6[ELM1] == ans[5])
         {
          return 1;
         }
threefivesix:
         flag = 0;
         if (D1[ELM1] == ans[2])
         {
          flag++;
         }
         if (D2[ELM1] == ans[2])
         {
          flag++;
         }
         if (D3[ELM1] == ans[2])
         {
          flag++;
         }
         if (D4[ELM1] == ans[2])
         {
          flag++;
         }
         if (D5[ELM1] == ans[2])
         {
          flag++;
         }
         if (D6[ELM1] == ans[2])
         {
          flag++;
         }
         if (flag != 1)
          goto fourfivesix;
         if (D1[ELM1] == ans[4])
         {
          flag++;
         }
         if (D2[ELM1] == ans[4])
         {
          flag++;
         }
         if (D3[ELM1] == ans[4])
         {
          flag++;
         }
         if (D4[ELM1] == ans[4])
         {
          flag++;
         }
         if (D5[ELM1] == ans[4])
         {
          flag++;
         }
         if (D6[ELM1] == ans[4])
         {
          flag++;
         }
         if (flag != 2)
          goto fourfivesix;
         if (D1[ELM1] == ans[5])
         {
          return 1;
         }
         if (D2[ELM1] == ans[5])
         {
          return 1;
         }
         if (D3[ELM1] == ans[5])
         {
          return 1;
         }
         if (D4[ELM1] == ans[5])
         {
          return 1;
         }
         if (D5[ELM1] == ans[5])
         {
          return 1;
         }
         if (D6[ELM1] == ans[5])
         {
          return 1;
         }
fourfivesix:
         flag = 0;
         if (D1[ELM1] == ans[3])
         {
          flag++;
         }
         if (D2[ELM1] == ans[3])
         {
          flag++;
         }
         if (D3[ELM1] == ans[3])
         {
          flag++;
         }
         if (D4[ELM1] == ans[3])
         {
          flag++;
         }
         if (D5[ELM1] == ans[3])
         {
          flag++;
         }
         if (D6[ELM1] == ans[3])
         {
          flag++;
         }
         if (flag != 1)
          goto increasecounter;
         if (D1[ELM1] == ans[4])
         {
          flag++;
         }
         if (D2[ELM1] == ans[4])
         {
          flag++;
         }
         if (D3[ELM1] == ans[4])
         {
          flag++;
         }
         if (D4[ELM1] == ans[4])
         {
          flag++;
         }
         if (D5[ELM1] == ans[4])
         {
          flag++;
         }
         if (D6[ELM1] == ans[4])
         {
          flag++;
         }
         if (flag != 2)
          goto increasecounter;
         if (D1[ELM1] == ans[5])
         {
          return 1;
         }
         if (D2[ELM1] == ans[5])
         {
          return 1;
         }
         if (D3[ELM1] == ans[5])
         {
          return 1;
         }
         if (D4[ELM1] == ans[5])
         {
          return 1;
         }
         if (D5[ELM1] == ans[5])
         {
          return 1;
         }
         if (D6[ELM1] == ans[5])
         {
          return 1;
         }



increasecounter:

         ELM1++;
     } 
          //If it is Three min

          return 0;


}

推荐答案

通过将if语句转换为布尔表达式来删除它们.

remove if statements by converting them to Boolean expressions.

flag += (DN[ELM1] == ans[0])

确保数组位于寄存器或共享内存中,而不是全局

make sure your arrays are in registers or shared memory rather than global

此外,在这样简单的算法上,您会得到过于复杂的逻辑.将D数组的布局更改为D [N] [6],因为它将简化许多事情

also, on such simple algorithm you got way too complicated logic. change layout of D arrays to be D[N][6] as it will simplify many things

顺便说一句,您可能希望对您的帖子进行一些裁剪,以至于无法阅读

by the way, you may want to crop your post a little bit, way too much to read

3 x3示例

     A
  |0 0 0|           |x x 0
D |0 0 0| -> ... -> |x x 0 -> reduce down -> |x x o| -> reduce across -> 2x
  |0 0 0|           |x x x

基本上,如果匹配A在数组D中,则将矩阵像元设置为true. 在每次迭代中,如果整列为true,则将column减少为true. 那么你算出的是真数字.

basically you are setting matrix cells to true if match A is in array D. on each iteration you reduce columns to true if entire column is true. then you count number of true.

这篇关于如何在CUDA下改善此功能?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆