在4.1.2和更早版本中实现GCC cas功能 [英] achieve GCC cas function for version 4.1.2 and earlier
问题描述
我的新公司项目,他们希望代码运行在32位,编译服务器是带有 GCC 4.1.1 的 CentOS 5.0 .
在项目中使用的功能很多,例如__sync_fetch_and_add
在 GCC 4.1.2 及更高版本中给出.
My new company project, they want the code run for the 32-bit, the compile server is a CentOS 5.0 with GCC 4.1.1, that was the nightmare.
There are lots of functions using in the project like __sync_fetch_and_add
was given in GCC 4.1.2 and later.
有人告诉我无法升级GCC版本,因此在谷歌搜索了几个小时后,我不得不提出另一个解决方案.
I was told can not upgrade GCC version, so I have to make another solution after Googling for several hours.
当我编写演示进行测试时,我得到的答案是错误的,代码打击想替换功能__sync_fetch_and_add
When I wrote a demo to test, I just got the wrong answer, the code blow want to replace function __sync_fetch_and_add
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
static int count = 0;
int compare_and_swap(int* reg, int oldval, int newval)
{
register char result;
#ifdef __i386__
__asm__ volatile ("lock; cmpxchgl %3, %0; setz %1"
: "=m"(*reg), "=q" (result)
: "m" (*reg), "r" (newval), "a" (oldval)
: "memory");
return result;
#elif defined(__x86_64__)
__asm__ volatile ("lock; cmpxchgq %3, %0; setz %1"
: "=m"(*reg), "=q" (result)
: "m" (*reg), "r" (newval), "a" (oldval)
: "memory");
return result;
#else
#error:architecture not supported and gcc too old
#endif
}
void *test_func(void *arg)
{
int i = 0;
for(i = 0; i < 2000; ++i) {
compare_and_swap((int *)&count, count, count + 1);
}
return NULL;
}
int main(int argc, const char *argv[])
{
pthread_t id[10];
int i = 0;
for(i = 0; i < 10; ++i){
pthread_create(&id[i], NULL, test_func, NULL);
}
for(i = 0; i < 10; ++i) {
pthread_join(id[i], NULL);
}
//10*2000=20000
printf("%d\n", count);
return 0;
}
我什么时候得到了错误的结果:
Whent I got the wrong result:
[root@centos-linux-7 workspace]# ./asm
17123
[root@centos-linux-7 workspace]# ./asm
14670
[root@centos-linux-7 workspace]# ./asm
14604
[root@centos-linux-7 workspace]# ./asm
13837
[root@centos-linux-7 workspace]# ./asm
14043
[root@centos-linux-7 workspace]# ./asm
16160
[root@centos-linux-7 workspace]# ./asm
15271
[root@centos-linux-7 workspace]# ./asm
15280
[root@centos-linux-7 workspace]# ./asm
15465
[root@centos-linux-7 workspace]# ./asm
16673
我在这行中意识到
compare_and_swap((int *)&count, count, count + 1);
count + 1
是错的!
然后我该如何实现与__sync_fetch_and_add
相同的功能. compare_and_swap
函数在第三个参数恒定时起作用.
Then how can I implement the same function as __sync_fetch_and_add
. The compare_and_swap
function works when the third parameter is constant.
顺便说一句,compare_and_swap
函数对吗?我只是为此搜索了Google,不熟悉汇编程序.
By the way, compare_and_swap
function is that right? I just Googled for that, not familiar with assembly.
我对这个问题感到绝望.
I got despair with this question.
…………………………………………………………………………………………………… ………………………………
………………………………………………………………………………………………………………………………………………………………………………………………………………………
看到下面的答案后,我用了一会儿就得到了正确的答案,但似乎更加困惑. 这是代码:
after seeing the answer below,I use while and got the right answer,but seems confuse more. here is the code:
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
static unsigned long count = 0;
int sync_add_and_fetch(int* reg, int oldval, int incre)
{
register char result;
#ifdef __i386__
__asm__ volatile ("lock; cmpxchgl %3, %0; setz %1" : "=m"(*reg), "=q" (result) : "m" (*reg), "r" (oldval + incre), "a" (oldval) : "memory");
return result;
#elif defined(__x86_64__)
__asm__ volatile ("lock; cmpxchgq %3, %0; setz %1" : "=m"(*reg), "=q" (result) : "m" (*reg), "r" (newval + incre), "a" (oldval) : "memory");
return result;
#else
#error:architecture not supported and gcc too old
#endif
}
void *test_func(void *arg)
{
int i=0;
int result = 0;
for(i=0;i<2000;++i)
{
result = 0;
while(0 == result)
{
result = sync_add_and_fetch((int *)&count, count, 1);
}
}
return NULL;
}
int main(int argc, const char *argv[])
{
pthread_t id[10];
int i = 0;
for(i=0;i<10;++i){
pthread_create(&id[i],NULL,test_func,NULL);
}
for(i=0;i<10;++i){
pthread_join(id[i],NULL);
}
//10*2000=20000
printf("%u\n",count);
return 0;
}
答案恰好是20000,所以我认为当您使用sync_add_and_fetch函数时,应该使用while循环是愚蠢的,所以我这样写:
the answer goes right to 20000,so i think when you use sync_add_and_fetch function,you should goes with a while loop is stupid,so I write like this:
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
static unsigned long count = 0;
int compare_and_swap(int* reg, int oldval, int incre)
{
register char result;
#ifdef __i386__
__asm__ volatile ("lock; cmpxchgl %3, %0; setz %1" : "=m"(*reg), "=q" (result) : "m" (*reg), "r" (oldval + incre), "a" (oldval) : "memory");
return result;
#elif defined(__x86_64__)
__asm__ volatile ("lock; cmpxchgq %3, %0; setz %1" : "=m"(*reg), "=q" (result) : "m" (*reg), "r" (newval + incre), "a" (oldval) : "memory");
return result;
#else
#error:architecture not supported and gcc too old
#endif
}
void sync_add_and_fetch(int *reg,int oldval,int incre)
{
int ret = 0;
while(0 == ret)
{
ret = compare_and_swap(reg,oldval,incre);
}
}
void *test_func(void *arg)
{
int i=0;
for(i=0;i<2000;++i)
{
sync_add_and_fetch((int *)&count, count, 1);
}
return NULL;
}
int main(int argc, const char *argv[])
{
pthread_t id[10];
int i = 0;
for(i=0;i<10;++i){
pthread_create(&id[i],NULL,test_func,NULL);
}
for(i=0;i<10;++i){
pthread_join(id[i],NULL);
}
//10*2000=20000
printf("%u\n",count);
return 0;
}
但是当我在g ++ -g -o asm asm.cpp -lpthread之后用./asm运行此代码时,asm停留了5分钟以上,请参见另一个终端的顶部:
but when i run this code with ./asm after g++ -g -o asm asm.cpp -lpthread.the asm just stuck for more than 5min,see top in another terminal:
3861根19 0 102m 888732 S 400 0.0 2:51.06 asm
3861 root 19 0 102m 888 732 S 400 0.0 2:51.06 asm
我只是感到困惑,这段代码不一样吗?
I just confused,is this code not the same?
推荐答案
如果您确实处于这种困境中,那么我将从以下头文件开始:
If you truly are in such a predicament, I would start with the following header file:
#ifndef SYNC_H
#define SYNC_H
#if defined(__x86_64__) || defined(__i386__)
static inline int sync_val_compare_and_swap_int(int *ptr, int oldval, int newval)
{
__asm__ __volatile__( "lock cmpxchgl %[newval], %[ptr]"
: "+a" (oldval), [ptr] "+m" (*ptr)
: [newval] "r" (newval)
: "memory" );
return oldval;
}
static inline int sync_fetch_and_add_int(int *ptr, int val)
{
__asm__ __volatile__( "lock xaddl %[val], %[ptr]"
: [val] "+r" (val), [ptr] "+m" (*ptr)
:
: "memory" );
return val;
}
static inline int sync_add_and_fetch_int(int *ptr, int val)
{
const int old = val;
__asm__ __volatile__( "lock xaddl %[val], %[ptr]"
: [val] "+r" (val), [ptr] "+m" (*ptr)
:
: "memory" );
return old + val;
}
static inline int sync_fetch_and_sub_int(int *ptr, int val) { return sync_fetch_and_add_int(ptr, -val); }
static inline int sync_sub_and_fetch_int(int *ptr, int val) { return sync_add_and_fetch_int(ptr, -val); }
/* Memory barrier */
static inline void sync_synchronize(void) { __asm__ __volatile__( "mfence" ::: "memory"); }
#else
#error Unsupported architecture.
#endif
#endif /* SYNC_H */
相同的扩展内联程序集可同时用于x86和x86-64.仅实现int
类型,并且您确实需要用sync_synchronize()
替换可能的__sync_synchronize()
调用,并用sync_..._int()
替换每个__sync_...()
调用.
The same extended inline assembly works for both x86 and x86-64. Only the int
type is implemented, and you do need to replace possible __sync_synchronize()
calls with sync_synchronize()
, and each __sync_...()
call with sync_..._int()
.
要进行测试,您可以使用例如
To test, you can use e.g.
#include <stdlib.h>
#include <pthread.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
#include "sync.h"
#define THREADS 16
#define PERTHREAD 8000
void *test_func1(void *sumptr)
{
int *const sum = sumptr;
int n = PERTHREAD;
while (n-->0)
sync_add_and_fetch_int(sum, n + 1);
return NULL;
}
void *test_func2(void *sumptr)
{
int *const sum = sumptr;
int n = PERTHREAD;
while (n-->0)
sync_fetch_and_add_int(sum, n + 1);
return NULL;
}
void *test_func3(void *sumptr)
{
int *const sum = sumptr;
int n = PERTHREAD;
int oldval, curval, newval;
while (n-->0) {
curval = *sum;
do {
oldval = curval;
newval = curval + n + 1;
} while ((curval = sync_val_compare_and_swap_int(sum, oldval, newval)) != oldval);
}
return NULL;
}
static void *(*worker[3])(void *) = { test_func1, test_func2, test_func3 };
int main(void)
{
pthread_t thread[THREADS];
pthread_attr_t attrs;
int sum = 0;
int t, result;
pthread_attr_init(&attrs);
pthread_attr_setstacksize(&attrs, 65536);
for (t = 0; t < THREADS; t++) {
result = pthread_create(thread + t, &attrs, worker[t % 3], &sum);
if (result) {
fprintf(stderr, "Failed to create thread %d of %d: %s.\n", t+1, THREADS, strerror(errno));
exit(EXIT_FAILURE);
}
}
pthread_attr_destroy(&attrs);
for (t = 0; t < THREADS; t++)
pthread_join(thread[t], NULL);
t = THREADS * PERTHREAD * (PERTHREAD + 1) / 2;
if (sum == t)
printf("sum = %d (as expected)\n", sum);
else
printf("sum = %d (expected %d)\n", sum, t);
return EXIT_SUCCESS;
}
不幸的是,我没有要测试的旧版GCC,因此仅在Linux上针对x86和x86-64(使用-O2
)使用GCC 5.4.0和GCC-4.9.3进行了测试.
Unfortunately, I don't have an ancient version of GCC to test, so this has only been tested with GCC 5.4.0 and GCC-4.9.3 for x86 and x86-64 (using -O2
) on Linux.
如果您发现上述任何错误或问题,请在评论中告知我,以便我根据需要进行验证和修复.
If you find any bugs or issues in the above, please let me know in a comment so I can verify and fix as needed.
这篇关于在4.1.2和更早版本中实现GCC cas功能的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!