在快速排序表现不佳,双轴,许多平等键 [英] Poor performance on Quicksort, Dual Pivot, Many equal keys

查看:395
本文介绍了在快速排序表现不佳,双轴,许多平等键的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我有一个快速排序的关于双枢轴战略的性能​​问题。

情况:

以数组的 n个随机生成的32位整数,按升序带来这些。让我们修复 N = 10_000_000 并让生成随机数[0,MAX_RAND] 。在这里,一切正常 - 我实现的性能相媲美的的qsort之一()库实现

现在的问题:

采取同样的 N 同上,但限制的随机号码INTERVALL到 [0,K],其中K< 〜5000 ,在性能大幅提升分解。从我的机器的一些数字:

K = MAX_RAND结果
QS-DUAL-PIVOT时间:865ms结果
升序:1结果
QS_LIBRARY时间:1296ms结果
升序:1

K = 10000结果
QS-DUAL-PIVOT时间:2521ms结果
升序:1结果
QS_LIBRARY时间:1076ms结果
升序:1

K = 5000结果
QS-DUAL-PIVOT时间:4420ms结果
升序:1结果
QS_LIBRARY时间:1044ms结果
升序:1

我想,你的想法。它编译我的私人英特尔的机器上用gcc-5.2下拱的Linux-64 -O9。

你对我有什么提示吗?请告诉我这里的问题?

举例code,产生上述输出:

 的#include<&stdio.h中GT;
#包括LT&;&stdlib.h中GT;
#包括LT&;&time.h中GT;INT is_ascending为(int *改编,诠释大小)
{
    INT I;    对于(i = 0; I<大小 - 1;我++)
        如果(改编[Ⅰ]≥常用3第[i + 1])
            返回0;    返回1;
}
无效quick_sort_dual_pivot为(int *改编,诠释低,诠释高)
{
    INT LT,HT,我,温度;    如果(低> =高)
        返回;    LT =低+ 1;
    HT =高 - 1;    如果(ARR [低]>常用3 [高])
        {
        TEMP = ARR [低]
        ARR [低] = ARR [高];
        ARR [高] =温度;
        }    如果(低+ 1 ==高)
        返回;    对于(i =低+ 1; I< =高; ++ I)
        {
        如果(ⅰ> HT)
            打破;        如果(ARR [1] - ,常用3 [小])
            {
            TEMP = ARR [LT]
            ARR [LT] =改编[I]
            改编[I] =温度;
            ++ LT;
            }
        否则,如果(ARR [I]>常用3 [高])
            {
            TEMP = ARR [HT]
            ARR [HT] =改编[I]
            改编[I] =温度;
            --ht;
             - 一世;
            }
        }    ++ HT;
    TEMP = ARR [HT]
    ARR [HT] = ARR [高];
    ARR [高] =温度;    --lt;
    TEMP = ARR [LT]
    ARR [LT] =改编[小]
    ARR [低] =温度;    quick_sort_dual_pivot(ARR,低,LT - 1);
    quick_sort_dual_pivot(ARR,LT + 1,HT - 1);
    quick_sort_dual_pivot(ARR,HT + 1,高);
}INT get_ms(INT启动)
{
    静态timeval结构START_TIME;
           timeval结构END_TIME;
    诠释毫秒,秒,​​useconds;    如果(开始)
        {
        函数gettimeofday(安培; START_TIME,NULL);        返回0;
        }    函数gettimeofday(安培; END_TIME,NULL);    秒= end_time.tv_sec - start_time.tv_sec;
    useconds = end_time.tv_usec - start_time.tv_usec;    毫秒=((秒)* 1000 + useconds / 1000)+ 0.5;    返回毫秒;
}
INT comp_asc(常量无效* ARG1,常量无效* ARG2)
{
     回报(*为(int *)ARG1) - (*为(int *)ARG2);
}
#定义ARRAY_SIZE千万INT主要(无效)
{
   INT I;
   INT毫秒;
   静态INT A1 [ARRAY_SIZE]
   静态INT A2 [ARRAY_SIZE]   函数srand(时间(NULL));   对于(i = 0; I< ARRAY_SIZE;我++)
       {
       INT R =兰特()%5000;
       A1 [I] = R;
       A2 [I] = R;
       }   get_ms(1);
   quick_sort_dual_pivot(A1,0,ARRAY_SIZE - 1);
   毫秒= get_ms(0);
   的printf(QS-DUAL-PIVOT时间:%DMS的\\ n,毫秒);
   的printf(升序数:%d \\ n,is_ascending(A1,ARRAY_SIZE));
   get_ms(1);
   的qsort(A2,ARRAY_SIZE,sizeof的(INT),comp_asc);
   毫秒= get_ms(0);
   的printf(QS_LIBRARY时间:%DMS的\\ n,毫秒);
   的printf(升序数:%d \\ n,is_ascending(A2,ARRAY_SIZE));   返回0;
}


解决方案

结果是惊人的!我还增加了插入排序的扭曲和现在看到的结果如下:

  INT R = RAND();


  

QS-DUAL-PIVOT时间:875ms结果
  QS-DUAL-PIVOT2时间:847ms结果
  QS-LIBRARY时间:1289ms


  INT R =兰特()%10000;


  

QS-DUAL-PIVOT时间:2511ms结果
  QS-DUAL-PIVOT2时间:513ms结果
  QS-LIBRARY时间:1080ms


  INT R =兰特()%5000;


  

QS-DUAL-PIVOT时间:4399ms结果
  QS-DUAL-PIVOT2时间:475ms结果
  QS-LIBRARY时间:1043ms


当然,与库函数比较不此刻准确的,因为我有概括的数据类型为我实现。但尽管如此,这些结果是比较看好的。

I have an issue with the performance of quicksort concerning the dual pivoting strategy.

The situation:

Take an array a of n randomly generated 32-bit integers and bring these in ascending order. Lets fix n = 10_000_000 and let the random numbers be generated in [0,MAX_RAND]. Here everything works as expected - the performance of my implementation is comparable the one of the qsort() library implementation.

Now the problem:

Taking the same n as above but restricting the intervall for the random-numbers to [0,K] where K < ~5000, the performance breaks down dramatically. Some numbers from my machine:

K = MAX_RAND
QS-DUAL-PIVOT Time: 865ms
Ascending: 1
QS_LIBRARY Time: 1296ms
Ascending: 1

K = 10000
QS-DUAL-PIVOT Time: 2521ms
Ascending: 1
QS_LIBRARY Time: 1076ms
Ascending: 1

K = 5000
QS-DUAL-PIVOT Time: 4420ms
Ascending: 1
QS_LIBRARY Time: 1044ms
Ascending: 1

I think, you get the idea. Its compiled on my private intel machine with gcc-5.2 under Arch-Linux-64 with -O9.

Do you have any hints for me? Whats the problem here?

Example code to generate the above outputs:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

int is_ascending (int *arr, int size)
{
    int i;

    for (i = 0; i < size - 1; i ++)
        if (arr[i] > arr[i + 1])
            return 0;

    return 1;
}


void quick_sort_dual_pivot (int *arr, int low, int high)
{
    int lt, ht, i, temp;

    if (low >= high)
        return;

    lt = low + 1;
    ht = high - 1;

    if (arr[low] > arr[high])
        {
        temp      = arr[low];
        arr[low]  = arr[high];
        arr[high] = temp;
        }

    if (low + 1 == high)
        return;

    for (i = low + 1; i <= high; ++i)
        {
        if (i > ht)
            break;

        if (arr[i] < arr[low])
            {
            temp = arr[lt];
            arr[lt] = arr[i];
            arr[i] = temp;
            ++lt;
            }
        else if (arr[i] > arr[high])
            {
            temp = arr[ht];
            arr[ht] = arr[i];
            arr[i] = temp;
            --ht;
            --i;
            }
        }

    ++ht;
    temp      = arr[ht];
    arr[ht]   = arr[high];
    arr[high] = temp;

    --lt;
    temp     = arr[lt];
    arr[lt]  = arr[low];
    arr[low] = temp;

    quick_sort_dual_pivot (arr, low, lt - 1);
    quick_sort_dual_pivot (arr, lt + 1, ht - 1);
    quick_sort_dual_pivot (arr, ht + 1, high);
}

int get_ms (int start)
{
    static struct timeval start_time;
           struct timeval end_time;
    int msec, seconds, useconds;

    if (start)
        {
        gettimeofday (&start_time, NULL);

        return 0;
        }

    gettimeofday (&end_time, NULL);

    seconds  = end_time.tv_sec  - start_time.tv_sec;
    useconds = end_time.tv_usec - start_time.tv_usec;

    msec = ((seconds) * 1000 + useconds / 1000.) + 0.5;

    return msec;
}


int comp_asc (const void *arg1, const void *arg2)
{
     return (*(int *)arg1) - (*(int *)arg2);
}


#define ARRAY_SIZE 10000000

int main(void)
{
   int i;
   int millisec;
   static int a1[ARRAY_SIZE];
   static int a2[ARRAY_SIZE];

   srand (time (NULL));

   for (i = 0; i < ARRAY_SIZE; i ++)
       {
       int r = rand () % 5000;
       a1[i] = r;
       a2[i] = r;
       }

   get_ms (1);
   quick_sort_dual_pivot (a1, 0, ARRAY_SIZE - 1);
   millisec = get_ms (0);
   printf ("QS-DUAL-PIVOT Time: %dms\n", millisec);
   printf ("Ascending:          %d\n", is_ascending (a1, ARRAY_SIZE));
   get_ms (1);
   qsort (a2, ARRAY_SIZE, sizeof (int), comp_asc);
   millisec = get_ms (0);
   printf ("QS_LIBRARY Time:    %dms\n", millisec);
   printf ("Ascending:          %d\n", is_ascending (a2, ARRAY_SIZE));

   return 0;
}

解决方案

The results are amazing! I also added the insertion-sort twist and now see the following results:

int r = rand ();

QS-DUAL-PIVOT Time: 875ms
QS-DUAL-PIVOT2 Time: 847ms
QS-LIBRARY Time: 1289ms

int r = rand () % 10000;

QS-DUAL-PIVOT Time: 2511ms
QS-DUAL-PIVOT2 Time: 513ms
QS-LIBRARY Time: 1080ms

int r = rand () % 5000;

QS-DUAL-PIVOT Time: 4399ms
QS-DUAL-PIVOT2 Time: 475ms
QS-LIBRARY Time: 1043ms

Of course, the comparison with the library function is not accurate at the moment, because I have to generalize the datatype for my implementation. But still, these results are quite promising.

这篇关于在快速排序表现不佳,双轴,许多平等键的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆