在4.1.2和更早版本中实现GCC cas功能 [英] achieve GCC cas function for version 4.1.2 and earlier

查看:93
本文介绍了在4.1.2和更早版本中实现GCC cas功能的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我的新公司项目,他们希望代码运行在32位,编译服务器是带有 GCC 4.1.1 CentOS 5.0 .
在项目中使用的功能很多,例如__sync_fetch_and_add GCC 4.1.2 及更高版本中给出.

My new company project, they want the code run for the 32-bit, the compile server is a CentOS 5.0 with GCC 4.1.1, that was the nightmare.
There are lots of functions using in the project like __sync_fetch_and_add was given in GCC 4.1.2 and later.

有人告诉我无法升级GCC版本,因此在谷歌搜索了几个小时后,我不得不提出另一个解决方案.

I was told can not upgrade GCC version, so I have to make another solution after Googling for several hours.

当我编写演示进行测试时,我得到的答案是错误的,代码打击想替换功能__sync_fetch_and_add

When I wrote a demo to test, I just got the wrong answer, the code blow want to replace function __sync_fetch_and_add

#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>

static int count = 0;

int compare_and_swap(int* reg, int oldval, int newval) 
{
    register char result;
#ifdef __i386__
    __asm__ volatile ("lock; cmpxchgl %3, %0; setz %1" 
                     : "=m"(*reg), "=q" (result) 
                     : "m" (*reg), "r" (newval), "a" (oldval) 
                     : "memory");
    return result;
#elif defined(__x86_64__)
    __asm__ volatile ("lock; cmpxchgq %3, %0; setz %1" 
                     : "=m"(*reg), "=q" (result) 
                     : "m" (*reg), "r" (newval), "a" (oldval) 
                     : "memory");
    return result;
#else
    #error:architecture not supported and gcc too old
#endif

}

void *test_func(void *arg)
{
    int i = 0;
    for(i = 0; i < 2000; ++i) {
        compare_and_swap((int *)&count, count, count + 1);
    }

    return NULL;
}

int main(int argc, const char *argv[])
{
    pthread_t id[10];
    int i = 0;

    for(i = 0; i < 10; ++i){
        pthread_create(&id[i], NULL, test_func, NULL);
    }

    for(i = 0; i < 10; ++i) {
        pthread_join(id[i], NULL);
    }
    //10*2000=20000
    printf("%d\n", count);

    return 0;
}

我什么时候得到了错误的结果:

Whent I got the wrong result:

[root@centos-linux-7 workspace]# ./asm
17123
[root@centos-linux-7 workspace]# ./asm
14670
[root@centos-linux-7 workspace]# ./asm
14604
[root@centos-linux-7 workspace]# ./asm
13837
[root@centos-linux-7 workspace]# ./asm
14043
[root@centos-linux-7 workspace]# ./asm
16160
[root@centos-linux-7 workspace]# ./asm
15271
[root@centos-linux-7 workspace]# ./asm
15280
[root@centos-linux-7 workspace]# ./asm
15465
[root@centos-linux-7 workspace]# ./asm
16673

我在这行中意识到

compare_and_swap((int *)&count, count, count + 1); 

count + 1是错的!

然后我该如何实现与__sync_fetch_and_add相同的功能. compare_and_swap函数在第三个参数恒定时起作用.

Then how can I implement the same function as __sync_fetch_and_add. The compare_and_swap function works when the third parameter is constant.

顺便说一句,compare_and_swap函数对吗?我只是为此搜索了Google,不熟悉汇编程序.

By the way, compare_and_swap function is that right? I just Googled for that, not familiar with assembly.

我对这个问题感到绝望.

I got despair with this question.

…………………………………………………………………………………………………… ………………………………

………………………………………………………………………………………………………………………………………………………………………………………………………………………

看到下面的答案后,我用了一会儿就得到了正确的答案,但似乎更加困惑. 这是代码:

after seeing the answer below,I use while and got the right answer,but seems confuse more. here is the code:

#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>

static unsigned long  count = 0;

int sync_add_and_fetch(int* reg, int oldval, int incre) 
{
    register char result;
#ifdef __i386__
    __asm__ volatile ("lock; cmpxchgl %3, %0; setz %1" : "=m"(*reg), "=q" (result) : "m" (*reg), "r" (oldval + incre), "a" (oldval) : "memory");
    return result;
#elif defined(__x86_64__)
    __asm__ volatile ("lock; cmpxchgq %3, %0; setz %1" : "=m"(*reg), "=q" (result) : "m" (*reg), "r" (newval + incre), "a" (oldval) : "memory");
    return result;
#else
    #error:architecture not supported and gcc too old
#endif

}


void *test_func(void *arg)
{
    int i=0;
    int result = 0;
    for(i=0;i<2000;++i)
    {
        result = 0;
        while(0 == result)
        {
            result = sync_add_and_fetch((int *)&count, count, 1);
        }
    }

    return NULL;
}

int main(int argc, const char *argv[])
{
    pthread_t id[10];
    int i = 0;

    for(i=0;i<10;++i){
        pthread_create(&id[i],NULL,test_func,NULL);
    }

    for(i=0;i<10;++i){
        pthread_join(id[i],NULL);
    }
    //10*2000=20000
    printf("%u\n",count);

    return 0;
}

答案恰好是20000,所以我认为当您使用sync_add_and_fetch函数时,应该使用while循环是愚蠢的,所以我这样写:

the answer goes right to 20000,so i think when you use sync_add_and_fetch function,you should goes with a while loop is stupid,so I write like this:

#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>

static unsigned long  count = 0;

int compare_and_swap(int* reg, int oldval, int incre) 
{
    register char result;
#ifdef __i386__
    __asm__ volatile ("lock; cmpxchgl %3, %0; setz %1" : "=m"(*reg), "=q" (result) : "m" (*reg), "r" (oldval + incre), "a" (oldval) : "memory");
    return result;
#elif defined(__x86_64__)
    __asm__ volatile ("lock; cmpxchgq %3, %0; setz %1" : "=m"(*reg), "=q" (result) : "m" (*reg), "r" (newval + incre), "a" (oldval) : "memory");
    return result;
#else
    #error:architecture not supported and gcc too old
#endif

}

void sync_add_and_fetch(int *reg,int oldval,int incre)
{
    int ret = 0;
    while(0 == ret)
    {
       ret = compare_and_swap(reg,oldval,incre);
    }
}

void *test_func(void *arg)
{
    int i=0;
    for(i=0;i<2000;++i)
    {
        sync_add_and_fetch((int *)&count, count, 1);
    }

    return NULL;
}

int main(int argc, const char *argv[])
{
    pthread_t id[10];
    int i = 0;

    for(i=0;i<10;++i){
        pthread_create(&id[i],NULL,test_func,NULL);
    }

    for(i=0;i<10;++i){
        pthread_join(id[i],NULL);
    }
    //10*2000=20000
    printf("%u\n",count);

    return 0;
}

但是当我在g ++ -g -o asm asm.cpp -lpthread之后用./asm运行此代码时,asm停留了5分钟以上,请参见另一个终端的顶部:

but when i run this code with ./asm after g++ -g -o asm asm.cpp -lpthread.the asm just stuck for more than 5min,see top in another terminal:

3861根19 0 102m 888732 S 400 0.0 2:51.06 asm

3861 root 19 0 102m 888 732 S 400 0.0 2:51.06 asm

我只是感到困惑,这段代码不一样吗?

I just confused,is this code not the same?

推荐答案

如果您确实处于这种困境中,那么我将从以下头文件开始:

If you truly are in such a predicament, I would start with the following header file:

#ifndef   SYNC_H
#define   SYNC_H
#if defined(__x86_64__) || defined(__i386__)

static inline int  sync_val_compare_and_swap_int(int *ptr, int oldval, int newval)
{
    __asm__ __volatile__( "lock cmpxchgl %[newval], %[ptr]"
                        : "+a" (oldval), [ptr] "+m" (*ptr)
                        : [newval] "r" (newval)
                        : "memory" );
    return oldval;
}

static inline int  sync_fetch_and_add_int(int *ptr, int val)
{
    __asm__ __volatile__( "lock xaddl %[val], %[ptr]"
                        : [val] "+r" (val), [ptr] "+m" (*ptr)
                        :
                        : "memory" );
    return val;
}


static inline int  sync_add_and_fetch_int(int *ptr, int val)
{
    const int  old = val;
    __asm__ __volatile__( "lock xaddl %[val], %[ptr]"
                        : [val] "+r" (val), [ptr] "+m" (*ptr)
                        :
                        : "memory" );
    return old + val;
}

static inline int  sync_fetch_and_sub_int(int *ptr, int val) { return sync_fetch_and_add_int(ptr, -val); }
static inline int  sync_sub_and_fetch_int(int *ptr, int val) { return sync_add_and_fetch_int(ptr, -val); }

/* Memory barrier */
static inline void  sync_synchronize(void) { __asm__ __volatile__( "mfence" ::: "memory"); }

#else
#error Unsupported architecture.
#endif
#endif /* SYNC_H */

相同的扩展内联程序集可同时用于x86和x86-64.仅实现int类型,并且您确实需要用sync_synchronize()替换可能的__sync_synchronize()调用,并用sync_..._int()替换每个__sync_...()调用.

The same extended inline assembly works for both x86 and x86-64. Only the int type is implemented, and you do need to replace possible __sync_synchronize() calls with sync_synchronize(), and each __sync_...() call with sync_..._int().

要进行测试,您可以使用例如

To test, you can use e.g.

#include <stdlib.h>
#include <pthread.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
#include "sync.h"

#define  THREADS   16
#define  PERTHREAD 8000

void *test_func1(void *sumptr)
{
    int *const sum = sumptr;
    int        n = PERTHREAD;
    while (n-->0)
        sync_add_and_fetch_int(sum, n + 1);
    return NULL;
}

void *test_func2(void *sumptr)
{
    int *const sum = sumptr;
    int        n = PERTHREAD;
    while (n-->0)
        sync_fetch_and_add_int(sum, n + 1);
    return NULL;
}

void *test_func3(void *sumptr)
{
    int *const sum = sumptr;
    int        n = PERTHREAD;
    int        oldval, curval, newval;
    while (n-->0) {
        curval = *sum;
        do {
            oldval = curval;
            newval = curval + n + 1;
        } while ((curval = sync_val_compare_and_swap_int(sum, oldval, newval)) != oldval);
    }
    return NULL;
}

static void *(*worker[3])(void *) = { test_func1, test_func2, test_func3 };

int main(void)
{
    pthread_t       thread[THREADS];
    pthread_attr_t  attrs;
    int             sum = 0;
    int             t, result;

    pthread_attr_init(&attrs);
    pthread_attr_setstacksize(&attrs, 65536);
    for (t = 0; t < THREADS; t++) {
        result = pthread_create(thread + t, &attrs, worker[t % 3], &sum);
        if (result) {
            fprintf(stderr, "Failed to create thread %d of %d: %s.\n", t+1, THREADS, strerror(errno));
            exit(EXIT_FAILURE);
        }
    }
    pthread_attr_destroy(&attrs);

    for (t = 0; t < THREADS; t++)
        pthread_join(thread[t], NULL);

    t = THREADS * PERTHREAD * (PERTHREAD + 1) / 2;
    if (sum == t)
        printf("sum = %d (as expected)\n", sum);
    else
        printf("sum = %d (expected %d)\n", sum, t);

    return EXIT_SUCCESS;
}

不幸的是,我没有要测试的旧版GCC,因此仅在Linux上针对x86和x86-64(使用-O2)使用GCC 5.4.0和GCC-4.9.3进行了测试.

Unfortunately, I don't have an ancient version of GCC to test, so this has only been tested with GCC 5.4.0 and GCC-4.9.3 for x86 and x86-64 (using -O2) on Linux.

如果您发现上述任何错误或问题,请在评论中告知我,以便我根据需要进行验证和修复.

If you find any bugs or issues in the above, please let me know in a comment so I can verify and fix as needed.

这篇关于在4.1.2和更早版本中实现GCC cas功能的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆