memcpy的多线程编程 [英] Multithread Programming for memcpy

查看:890
本文介绍了memcpy的多线程编程的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在做memcpy函数的优化任务,我在这里找到了这个链接。 如何提高memcpy的性能



由于我不熟悉多线程编程,我不知道如何将下面的代码插入到原来的main函数?如何将原始问题中的代码修改为多线程memcpy项目?我的意思是,如何为这个多线程memcpy项目创建一个完整的项目。在原始main函数中插入函数如startCopyThreads或stopCopyThreads或mt_memcpy函数的位置在哪里?

  #define NUM_CPY_THREADS 4 

HANDLE hCopyThreads [NUM_CPY_THREADS] = {0};
HANDLE hCopyStartSemaphores [NUM_CPY_THREADS] = {0};
HANDLE hCopyStopSemaphores [NUM_CPY_THREADS] = {0};
typedef struct
{
int ct;
void * src,* dest;
size_t size;
} mt_cpy_t

mt_cpy_t mtParamters [NUM_CPY_THREADS] = {0};

DWORD WINAPI thread_copy_proc(LPVOID param)
{
mt_cpy_t * p =(mt_cpy_t *)param;

while(1)
{
WaitForSingleObject(hCopyStartSemaphores [p-> ct],INFINITE);
memcpy(p-> dest,p-> src,p-> size);
ReleaseSemaphore(hCopyStopSemaphores [p-> ct],1,NULL);
}

return 0;
}

int startCopyThreads()
{
for(int ctr = 0; ctr< NUM_CPY_THREADS; ctr ++)
{
hCopyStartSemaphores [ctr] = CreateSemaphore(NULL,0,1,NULL);
hCopyStopSemaphores [ctr] = CreateSemaphore(NULL,0,1,NULL);
mtParamters [ctr] .ct = ctr;
hCopyThreads [ctr] = CreateThread(0,0,thread_copy_proc,& mtParamters [ctr],0,NULL);
}

return 0;
}

void * mt_memcpy(void * dest,void * src,size_t bytes)
{
//设置参数
for = 0; ctr< NUM_CPY_THREADS; ctr ++)
{
mtParamters [ctr] .dest =(char *)dest + ctr * bytes / NUM_CPY_THREADS;
mtParamters [ctr] .src =(char *)src + ctr * bytes / NUM_CPY_THREADS;
mtParamters [ctr] .size =(ctr + 1)* bytes / NUM_CPY_THREADS - ctr * bytes / NUM_CPY_THREADS;
}

//释放信号量以启动计算
for(int ctr = 0; ctr ReleaseSemaphore(hCopyStartSemaphores [ctr],1 , 空值);

//等待所有线程完成
WaitForMultipleObjects(NUM_CPY_THREADS,hCopyStopSemaphores,TRUE,INFINITE);

return dest;
}

int stopCopyThreads()
{
for(int ctr = 0; ctr< NUM_CPY_THREADS; ctr ++)
{
TerminateThread (hCopyThreads [ctr],0);
CloseHandle(hCopyStartSemaphores [ctr]);
CloseHandle(hCopyStopSemaphores [ctr]);
}
return 0;
}


解决方案

架构和操作系统。



使用一个处理器:



如果您使用 memcpy 在只有一个核心的机器上,那么它不会显示加速。原因是,对于在一个处理器上运行的所有线程,都会有上下文切换,这与在不使用线程的情况下使用memcpy相比会产生开销。



p>

在这种情况下,它还取决于内核,它是否在不同的处理器上映射你的线程,或者不是这些线程将是用户级的。如果你的线程在同时运行的不同处理器上,你可能会看到加速,如果内存有双端口访问。使用单端口访问,我不知道它是否会有改进。


I am doing an optimization task for memcpy function, I found this link here. How to increase performance of memcpy

Since I'm not familiar with multithread programming, I don't know how to insert the codes below to the original main function? How to modify the codes in the original question into a multithread memcpy project? I mean, how to create a complete project for this multithread memcpy project. Where are the places for inserting the functions like startCopyThreads or stopCopyThreads or mt_memcpy functions in the original main function?

#define NUM_CPY_THREADS 4

HANDLE hCopyThreads[NUM_CPY_THREADS] = {0};
HANDLE hCopyStartSemaphores[NUM_CPY_THREADS] = {0};
HANDLE hCopyStopSemaphores[NUM_CPY_THREADS] = {0};
typedef struct
{
    int ct;
    void * src, * dest;
    size_t size;
} mt_cpy_t;

mt_cpy_t mtParamters[NUM_CPY_THREADS] = {0};

DWORD WINAPI thread_copy_proc(LPVOID param)
{
    mt_cpy_t * p = (mt_cpy_t * ) param;

    while(1)
    {
        WaitForSingleObject(hCopyStartSemaphores[p->ct], INFINITE);
        memcpy(p->dest, p->src, p->size);
        ReleaseSemaphore(hCopyStopSemaphores[p->ct], 1, NULL);
    }

    return 0;
}

int startCopyThreads()
{
    for(int ctr = 0; ctr < NUM_CPY_THREADS; ctr++)
    {
        hCopyStartSemaphores[ctr] = CreateSemaphore(NULL, 0, 1, NULL);
        hCopyStopSemaphores[ctr] = CreateSemaphore(NULL, 0, 1, NULL);
        mtParamters[ctr].ct = ctr;
        hCopyThreads[ctr] = CreateThread(0, 0, thread_copy_proc, &mtParamters[ctr], 0,     NULL); 
}

    return 0;
}

void * mt_memcpy(void * dest, void * src, size_t bytes)
{
    //set up parameters
    for(int ctr = 0; ctr < NUM_CPY_THREADS; ctr++)
    {
        mtParamters[ctr].dest = (char *) dest + ctr * bytes / NUM_CPY_THREADS;
        mtParamters[ctr].src = (char *) src + ctr * bytes / NUM_CPY_THREADS;
        mtParamters[ctr].size = (ctr + 1) * bytes / NUM_CPY_THREADS - ctr * bytes /     NUM_CPY_THREADS;
    }

    //release semaphores to start computation
    for(int ctr = 0; ctr < NUM_CPY_THREADS; ctr++)
        ReleaseSemaphore(hCopyStartSemaphores[ctr], 1, NULL);

    //wait for all threads to finish
    WaitForMultipleObjects(NUM_CPY_THREADS, hCopyStopSemaphores, TRUE, INFINITE);

    return dest;
}

int stopCopyThreads()
{
    for(int ctr = 0; ctr < NUM_CPY_THREADS; ctr++)
    {
        TerminateThread(hCopyThreads[ctr], 0);
        CloseHandle(hCopyStartSemaphores[ctr]);
        CloseHandle(hCopyStopSemaphores[ctr]);
    }
    return 0;
}

解决方案

It depends a lot on the architecture and OS.

With one processor:

If you are using threads for memcpy on the machine with only 1 core then it will not show speedup. Reason is, for all threads running on one processor there will be context switching which will be a overhead compare to when you use memcpy without using threads.

With multicore:

In this case, it also depends on the kernel, whether it is mapping your threads on different processors or not as these threads will be user level. If your threads are on different processors running concurrently you may see speedup if memory has dual port access. With single port access I am not sure whether it will have improvement.

这篇关于memcpy的多线程编程的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆