性能：boost.compute v.s. opencl c ++ wrapper [英] Performance: boost.compute v.s. opencl c++ wrapper

查看：1095 发布时间：2016/10/24 13:04:35 c++ boost opencl boost-compute

本文介绍了性能：boost.compute v.s. opencl c ++ wrapper的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

以下代码分别使用boost.compute和opencl c ++ wrapper添加两个向量。结果表明boost.compute几乎比opencl c ++包装器慢20倍。我不知道如果我错过使用boost.compute或者它确实很慢。
平台：win7，vs2013，boost 1.55，boost.compute 0.2，ATI Radeon HD 4600

The following codes add two vectors using boost.compute and opencl c++ wrapper respectively. The result shows boost.compute is almost 20 times slower than the opencl c++ wrapper. I wonder if I miss use boost.compute or it is indeed slow. Platform: win7, vs2013, boost 1.55, boost.compute 0.2, ATI Radeon HD 4600

代码使用c ++ wrapper：

Code uses the c++ wrapper:

#define __CL_ENABLE_EXCEPTIONS
#include <CL/cl.hpp>
#include <boost/timer/timer.hpp>
#include <boost/smart_ptr/scoped_array.hpp>
#include <fstream>
#include <numeric>
#include <algorithm>
#include <functional>

int main(){
    static char kernelSourceCode[] = "\
__kernel void vadd(__global int * a, __global int * b, __global int * c){\
    size_t i = get_global_id(0);\
    \
    c[i] = a[i] + b[i];\
    }\
";

    using type = boost::scoped_array<int>;
    size_t const BUFFER_SIZE = 1UL << 13;
    type A(new int[BUFFER_SIZE]);
    type B(new int[BUFFER_SIZE]);
    type C(new int[BUFFER_SIZE]);

    std::iota(A.get(), A.get() + BUFFER_SIZE, 0);
    std::transform(A.get(), A.get() + BUFFER_SIZE, B.get(), std::bind(std::multiplies<int>(), std::placeholders::_1, 2));

    try {
        std::vector<cl::Platform> platformList;
        // Pick platform
        cl::Platform::get(&platformList);
        // Pick first platform
        cl_context_properties cprops[] = {
            CL_CONTEXT_PLATFORM,
            (cl_context_properties)(platformList[0])(),
            0
        };
        cl::Context context(CL_DEVICE_TYPE_GPU, cprops);
        // Query the set of devices attached to the context
        std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
        // Create command-queue
        cl::CommandQueue queue(context, devices[0], 0);
        // Create the program from source
        cl::Program::Sources sources(
            1,
            std::make_pair(kernelSourceCode, 0)
            );
        cl::Program program(context, sources);
        // Build program
        program.build(devices);
        // Create buffer for A and copy host contents
        cl::Buffer aBuffer = cl::Buffer(
            context,
            CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
            BUFFER_SIZE * sizeof(int),
            (void *)&A[0]);
        // Create buffer for B and copy host contents
        cl::Buffer bBuffer = cl::Buffer(
            context,
            CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
            BUFFER_SIZE * sizeof(int),
            (void *)&B[0]);
        // Create buffer that uses the host ptr C
        cl::Buffer cBuffer = cl::Buffer(
            context,
            CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
            BUFFER_SIZE * sizeof(int),
            (void *)&C[0]);
        // Create kernel object
        cl::Kernel kernel(program, "vadd");
        // Set kernel args
        kernel.setArg(0, aBuffer);
        kernel.setArg(1, bBuffer);
        kernel.setArg(2, cBuffer);
        // Do the work
        void *output;
        {
            boost::timer::auto_cpu_timer timer;
            queue.enqueueNDRangeKernel(
                kernel,
                cl::NullRange,
                cl::NDRange(BUFFER_SIZE),
                cl::NullRange
                );
            output = (int *)queue.enqueueMapBuffer(
                cBuffer,
                CL_TRUE, // block
                CL_MAP_READ,
                0,
                BUFFER_SIZE * sizeof(int)
                );
        }
        std::ofstream gpu("gpu.txt");
        for (int i = 0; i < BUFFER_SIZE; i++) {
            gpu << C[i] << " ";
        }
        queue.enqueueUnmapMemObject(
            cBuffer,
            output);
    }
    catch (cl::Error const &err) {
        std::cerr << err.what() << "\n";
    }

    return EXIT_SUCCESS;
}

代码使用boost.compute：

Code uses boost.compute:

#include <boost/compute/container/mapped_view.hpp>
 #include <boost/compute/algorithm/transform.hpp>
 #include <boost/compute/functional/operator.hpp>
 #include <numeric>
 #include <algorithm>
 #include <functional>
 #include <boost/timer/timer.hpp>
 #include <boost/smart_ptr/scoped_array.hpp>
 #include <fstream>
 #include <boost/tuple/tuple_comparison.hpp>

 int main(){
     size_t const BUFFER_SIZE = 1UL << 13;
     boost::scoped_array<int> A(new int[BUFFER_SIZE]), B(new int[BUFFER_SIZE]), C(new int[BUFFER_SIZE]);

     std::iota(A.get(), A.get() + BUFFER_SIZE, 0);
     std::transform(A.get(), A.get() + BUFFER_SIZE, B.get(), std::bind(std::multiplies<int>(), std::placeholders::_1, 2));

     try{
         if (boost::compute::system::default_device().type() != CL_DEVICE_TYPE_GPU){
             std::cerr << "Not GPU\n";
         }
         else{
             boost::compute::command_queue queue = boost::compute::system::default_queue();
             boost::compute::mapped_view<int> mA(static_cast<const int*>(A.get()), BUFFER_SIZE),
                 mB(static_cast<const int*>(B.get()), BUFFER_SIZE);
             boost::compute::mapped_view<int> mC(C.get(), BUFFER_SIZE);
             {
                 boost::timer::auto_cpu_timer timer;
                 boost::compute::transform(
                     mA.cbegin(), mA.cend(),
                     mB.cbegin(),
                     mC.begin(),
                     boost::compute::plus<int>(),
                     queue
                     );
                 mC.map(CL_MAP_READ, queue);
             }
             std::ofstream gpu("gpu.txt");
             for (size_t i = 0; i != BUFFER_SIZE; ++i) gpu << C[i] << " ";
             mC.unmap(queue);
         }
     }
     catch (boost::compute::opencl_error const &err){
         std::cerr << err.what() << "\n";
     }

     return EXIT_SUCCESS;
 }

性能：boost.compute v.s. opencl c ++ wrapper [英] Performance: boost.compute v.s. opencl c++ wrapper

问题描述

推荐答案

相关文章

C/C++开发最新文章

热门教程

热门工具

登录关闭

性能：boost.compute v.s. opencl c ++ wrapper [英] Performance: boost.compute v.s. opencl c++ wrapper

问题描述

推荐答案

相关文章

C/C++开发最新文章

热门教程

热门工具

登录 关闭

登录关闭