memcpy的多线程编程 [英] Multithread Programming for memcpy
问题描述
我正在做memcpy函数的优化任务,我在这里找到了这个链接。 如何提高memcpy的性能
由于我不熟悉多线程编程,我不知道如何将下面的代码插入到原来的main函数?如何将原始问题中的代码修改为多线程memcpy项目?我的意思是,如何为这个多线程memcpy项目创建一个完整的项目。在原始main函数中插入函数如startCopyThreads或stopCopyThreads或mt_memcpy函数的位置在哪里?
#define NUM_CPY_THREADS 4
HANDLE hCopyThreads [NUM_CPY_THREADS] = {0};
HANDLE hCopyStartSemaphores [NUM_CPY_THREADS] = {0};
HANDLE hCopyStopSemaphores [NUM_CPY_THREADS] = {0};
typedef struct
{
int ct;
void * src,* dest;
size_t size;
} mt_cpy_t
mt_cpy_t mtParamters [NUM_CPY_THREADS] = {0};
DWORD WINAPI thread_copy_proc(LPVOID param)
{
mt_cpy_t * p =(mt_cpy_t *)param;
while(1)
{
WaitForSingleObject(hCopyStartSemaphores [p-> ct],INFINITE);
memcpy(p-> dest,p-> src,p-> size);
ReleaseSemaphore(hCopyStopSemaphores [p-> ct],1,NULL);
}
return 0;
}
int startCopyThreads()
{
for(int ctr = 0; ctr< NUM_CPY_THREADS; ctr ++)
{
hCopyStartSemaphores [ctr] = CreateSemaphore(NULL,0,1,NULL);
hCopyStopSemaphores [ctr] = CreateSemaphore(NULL,0,1,NULL);
mtParamters [ctr] .ct = ctr;
hCopyThreads [ctr] = CreateThread(0,0,thread_copy_proc,& mtParamters [ctr],0,NULL);
}
return 0;
}
void * mt_memcpy(void * dest,void * src,size_t bytes)
{
//设置参数
for = 0; ctr< NUM_CPY_THREADS; ctr ++)
{
mtParamters [ctr] .dest =(char *)dest + ctr * bytes / NUM_CPY_THREADS;
mtParamters [ctr] .src =(char *)src + ctr * bytes / NUM_CPY_THREADS;
mtParamters [ctr] .size =(ctr + 1)* bytes / NUM_CPY_THREADS - ctr * bytes / NUM_CPY_THREADS;
}
//释放信号量以启动计算
for(int ctr = 0; ctr ReleaseSemaphore(hCopyStartSemaphores [ctr],1 , 空值);
//等待所有线程完成
WaitForMultipleObjects(NUM_CPY_THREADS,hCopyStopSemaphores,TRUE,INFINITE);
return dest;
}
int stopCopyThreads()
{
for(int ctr = 0; ctr< NUM_CPY_THREADS; ctr ++)
{
TerminateThread (hCopyThreads [ctr],0);
CloseHandle(hCopyStartSemaphores [ctr]);
CloseHandle(hCopyStopSemaphores [ctr]);
}
return 0;
}
架构和操作系统。
使用一个处理器:
如果您使用 memcpy
在只有一个核心的机器上,那么它不会显示加速。原因是,对于在一个处理器上运行的所有线程,都会有上下文切换,这与在不使用线程的情况下使用memcpy相比会产生开销。
p>
在这种情况下,它还取决于内核,它是否在不同的处理器上映射你的线程,或者不是这些线程将是用户级的。如果你的线程在同时运行的不同处理器上,你可能会看到加速,如果内存有双端口访问。使用单端口访问,我不知道它是否会有改进。
I am doing an optimization task for memcpy function, I found this link here. How to increase performance of memcpy
Since I'm not familiar with multithread programming, I don't know how to insert the codes below to the original main function? How to modify the codes in the original question into a multithread memcpy project? I mean, how to create a complete project for this multithread memcpy project. Where are the places for inserting the functions like startCopyThreads or stopCopyThreads or mt_memcpy functions in the original main function?
#define NUM_CPY_THREADS 4
HANDLE hCopyThreads[NUM_CPY_THREADS] = {0};
HANDLE hCopyStartSemaphores[NUM_CPY_THREADS] = {0};
HANDLE hCopyStopSemaphores[NUM_CPY_THREADS] = {0};
typedef struct
{
int ct;
void * src, * dest;
size_t size;
} mt_cpy_t;
mt_cpy_t mtParamters[NUM_CPY_THREADS] = {0};
DWORD WINAPI thread_copy_proc(LPVOID param)
{
mt_cpy_t * p = (mt_cpy_t * ) param;
while(1)
{
WaitForSingleObject(hCopyStartSemaphores[p->ct], INFINITE);
memcpy(p->dest, p->src, p->size);
ReleaseSemaphore(hCopyStopSemaphores[p->ct], 1, NULL);
}
return 0;
}
int startCopyThreads()
{
for(int ctr = 0; ctr < NUM_CPY_THREADS; ctr++)
{
hCopyStartSemaphores[ctr] = CreateSemaphore(NULL, 0, 1, NULL);
hCopyStopSemaphores[ctr] = CreateSemaphore(NULL, 0, 1, NULL);
mtParamters[ctr].ct = ctr;
hCopyThreads[ctr] = CreateThread(0, 0, thread_copy_proc, &mtParamters[ctr], 0, NULL);
}
return 0;
}
void * mt_memcpy(void * dest, void * src, size_t bytes)
{
//set up parameters
for(int ctr = 0; ctr < NUM_CPY_THREADS; ctr++)
{
mtParamters[ctr].dest = (char *) dest + ctr * bytes / NUM_CPY_THREADS;
mtParamters[ctr].src = (char *) src + ctr * bytes / NUM_CPY_THREADS;
mtParamters[ctr].size = (ctr + 1) * bytes / NUM_CPY_THREADS - ctr * bytes / NUM_CPY_THREADS;
}
//release semaphores to start computation
for(int ctr = 0; ctr < NUM_CPY_THREADS; ctr++)
ReleaseSemaphore(hCopyStartSemaphores[ctr], 1, NULL);
//wait for all threads to finish
WaitForMultipleObjects(NUM_CPY_THREADS, hCopyStopSemaphores, TRUE, INFINITE);
return dest;
}
int stopCopyThreads()
{
for(int ctr = 0; ctr < NUM_CPY_THREADS; ctr++)
{
TerminateThread(hCopyThreads[ctr], 0);
CloseHandle(hCopyStartSemaphores[ctr]);
CloseHandle(hCopyStopSemaphores[ctr]);
}
return 0;
}
It depends a lot on the architecture and OS.
With one processor:
If you are using threads for memcpy
on the machine with only 1 core then it will not show speedup. Reason is, for all threads running on one processor there will be context switching which will be a overhead compare to when you use memcpy without using threads.
With multicore:
In this case, it also depends on the kernel, whether it is mapping your threads on different processors or not as these threads will be user level. If your threads are on different processors running concurrently you may see speedup if memory has dual port access. With single port access I am not sure whether it will have improvement.
这篇关于memcpy的多线程编程的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!