使用OpenCL的欧式距离 [英] Euclidean distance using OpenCL

查看:91
本文介绍了使用OpenCL的欧式距离的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在尝试计算一组5D点(像素)到5D单点(中心)的欧式距离并将其存储在另一个结果向量中,我想使用向量索引将所有信息存储在一个向量中,因此对于第i个像素,这5个维度是(5i),(5i + 1),... 我是OpenCL的新手,我只是出于自己的意图在Internet上编辑了示例代码.理论是正确的,但是代码没有给出正确的答案! 这是内核:

I am trying to compute the euclidean distance of a set of 5D points (pixels) to a 5D single point (center) and store in another result vector, I want to use vector indexing to store all info in a single vector so for the ith pixel, the 5 dimensions are (5i) , (5i+1) , ... I am new to OpenCL and I just edited a sample code on the internet for my own intentions. The theory is right but the code doesn't show the right answers ! Here is the kernel:

//d_kernel.cl

__kernel void distance_kernel(__global double *pixelInfo,
                                __global double *clusterCentres,
                                __global double *distanceFromClusterCentre)
{
    int index = get_global_id(0);

    int d, dl, da, db, dx, dy;

    dl = pixelInfo[5 * index] - clusterCentres[0];
    dl = dl * dl;

    da = pixelInfo[5 * index + 1] - clusterCentres[1];
    da = da * da;

    db = pixelInfo[5 * index + 2] - clusterCentres[2];
    db = db * db;

    dx = pixelInfo[5 * index + 3] - clusterCentres[3];
    dx = dx * dx;

    dy = pixelInfo[5 * index + 4] - clusterCentres[4];
    dy = dy * dy;

    distanceFromClusterCentre[index] = dx + dy + dl + da + db;

}

这是主机代码:

#include <iostream>
#include <CL/cl.h>
#include <vector>
using namespace std;

#define MAX_SOURCE_SIZE (0x100000)
int main(int argc, char **argv)
{

    // Create the two input vectors
    int i;
    const int pixelsNumber = 1024;
    const int clustersNumber = 1;

    std::vector<double> pixelInfo;
    pixelInfo.resize(5 * pixelsNumber);
    std::fill(pixelInfo.begin(), pixelInfo.end(), 500);

    std::vector<double> clusterCentres;
    clusterCentres.resize(5 * clustersNumber);
    std::fill(clusterCentres.begin(), clusterCentres.end(), 200);

    std::vector<double> distanceFromClusterCentre;
    distanceFromClusterCentre.resize(pixelsNumber);
    std::fill(distanceFromClusterCentre.begin(), distanceFromClusterCentre.end(), 0);

    // Load the kernel source code into the array source_str
    FILE *fp;
    char *source_str;
    size_t source_size;

    fp = fopen("d_kernel.cl", "r");
    if (!fp) {
        fprintf(stderr, "Failed to load kernel.\n");
        exit(1);
    }
    source_str = (char*)malloc(MAX_SOURCE_SIZE);
    source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
    fclose(fp);

    // Get platform and device information
    cl_platform_id platform_id = NULL;
    cl_device_id device_id = NULL;
    cl_uint ret_num_devices;
    cl_uint ret_num_platforms;
    cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
    ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1,
        &device_id, &ret_num_devices);

    // Create an OpenCL context
    cl_context context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);

    // Create a command queue
    cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);

    // Create memory buffers on the device for each vector 
    cl_mem pixelInfo_mem = clCreateBuffer(context, CL_MEM_READ_ONLY,
        5 * pixelsNumber * sizeof(int), NULL, &ret);
    cl_mem clusterCentres_mem = clCreateBuffer(context, CL_MEM_READ_ONLY,
        5 * clustersNumber * sizeof(int), NULL, &ret);
    cl_mem distanceFromClusterCentre_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
        pixelsNumber * sizeof(int), NULL, &ret);

    // Copy the vectors to their respective memory buffers
    ret = clEnqueueWriteBuffer(command_queue, pixelInfo_mem, CL_TRUE, 0,
        5 * pixelsNumber * sizeof(int), pixelInfo.data(), 0, NULL, NULL);
    ret = clEnqueueWriteBuffer(command_queue, clusterCentres_mem, CL_TRUE, 0,
        5 * clustersNumber * sizeof(int), clusterCentres.data(), 0, NULL, NULL);

    // Create a program from the kernel source
    cl_program program = clCreateProgramWithSource(context, 1,
        (const char **)&source_str, (const size_t *)&source_size, &ret);

    // Build the program
    ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);

    // Create the OpenCL kernel
    cl_kernel kernel = clCreateKernel(program, "vector_add", &ret);

    // Set the arguments of the kernel
    ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&pixelInfo_mem);
    ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&clusterCentres_mem);
    ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&distanceFromClusterCentre_mem);

    // Execute the OpenCL kernel on the list
    size_t global_item_size = pixelsNumber; // Process the entire lists
    size_t local_item_size = 64; // Divide work items into groups of 64
    ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
        &global_item_size, &local_item_size, 0, NULL, NULL);

    // Read the memory buffer result on the device to the local vector result
    ret = clEnqueueReadBuffer(command_queue, distanceFromClusterCentre_mem, CL_TRUE, 0,
        pixelsNumber * sizeof(int), distanceFromClusterCentre.data(), 0, NULL, NULL);

    // Display the result to the screen
    for (i = 0; i < pixelsNumber; i++)
    {
        cout << "Pixel " << i << ": " << distanceFromClusterCentre[i] << endl;
        //system("PAUSE");
    }

    // Clean up
    ret = clFlush(command_queue);
    ret = clFinish(command_queue);
    ret = clReleaseKernel(kernel);
    ret = clReleaseProgram(program);
    ret = clReleaseMemObject(pixelInfo_mem);
    ret = clReleaseMemObject(clusterCentres_mem);
    ret = clReleaseMemObject(distanceFromClusterCentre_mem);
    ret = clReleaseCommandQueue(command_queue);
    ret = clReleaseContext(context);
    free(pixelInfo.data());
    free(clusterCentres.data());
    free(distanceFromClusterCentre.data());

    system("PAUSE");
    return 0;
}

结果的一部分是:

.
.
.
Pixel 501: -1.11874e+306
Pixel 502: -1.16263e+306
Pixel 503: -1.07485e+306
Pixel 504: -1.03079e+306
Pixel 505: -9.42843e+305
Pixel 506: -9.86903e+305
Pixel 507: -8.98954e+305
Pixel 508: -9.86903e+305
Pixel 509: -8.98954e+305
Pixel 510: -9.43014e+305
Press any key to continue . . .
Pixel 511: -8.55065e+305
Pixel 512: 0
Pixel 513: 0
Pixel 514: 0
Pixel 515: 0
Pixel 516: 0
Pixel 517: 0
Pixel 518: 0
Pixel 519: 0
Pixel 520: 0
.
.
.

在索引511之后,向量的其余部分为零!

after index 511 the rest of the vector is zero !

推荐答案

您创建了double的向量,然后将它们视为存在int(为int创建了缓冲区,将数据写入int缓冲区并从中读取结果)整数).为避免此类错误,您可以通过以下方式编写代码:

You created your vectors of double's and then you treat them as there were ints (created buffer for ints, writing data to int buffers and reading back results as there were ints). To avoid such mistakes you could write your code this way:

cl_mem pixelInfo_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, pixelInfo.size() * sizeof(pixelInfo[0]), NULL, &ret);
                                                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

这篇关于使用OpenCL的欧式距离的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆