什么是排序后的数组最快的搜索方法? [英] What is the fastest search method for a sorted array?

查看:136
本文介绍了什么是排序后的数组最快的搜索方法?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

回答到<一个href="http://stackoverflow.com/questions/4752028/whats-wrong-with-this-interpolation-search-implementation/4752042#4752042">another问题,我写了下面的程序在一个排序的数组比较不同的搜索方法。基本上我比较插值搜索两种实现和二进制搜索之一。我比较了由不同变计数花费(具有相同的一组数据)循环性能

不过,我敢肯定有方法来优化这些功能,使他们更快。有没有人对我怎样才能使这个搜索功能,更快的任何想法?在C或C ++解决方案是可以接受的,但我需要它来处理一个包含有100000元。

 的#include&LT; stdlib.h中&GT;
#包括&LT; stdio.h中&GT;
#包括&LT; time.h中&GT;
#包括&LT; stdint.h&GT;
#包括&LT; ASSERT.H&GT;

静态__inline__无符号长长RDTSC(无效)
{
  无符号得到long long int X;
     __asm​​__挥发性(.BYTE为0x0F,0X31:= A(X));
     返回X;
}

INT interpolationSearch(INT sortedArray [],INT找到相当,诠释的len){
    //返回找到相当的指数sortedArray,或-1,如果未找到
    低的int64_t = 0;
    高的int64_t的len =  -  1;
    中期的int64_t;

    INT L = sortedArray [小]
    INT H = sortedArray [大]

    而(升&其中; =找到相当&安培;&安培h取代; =找到相当){
        中期=低+(的int64_t)((的int64_t)(高 - 低)*(的int64_t)(找到相当 -  1))/((的int64_t)(HL));

        INT M = sortedArray [MID];

        如果(M&LT;找到相当){
            L = sortedArray [低=中等+ 1];
        }否则,如果(M&GT;找到相当){
            H = sortedArray [高=中旬 -  1];
        } 其他 {
            返回中旬;
        }
    }

    如果(sortedArray [小] ==找到相当)
        回报低;
    其他
        返回-1; // 未找到
}

INT interpolationSearch2(INT sortedArray [],INT找到相当,诠释的len){
    //返回找到相当的指数sortedArray,或-1,如果未找到
    INT低= 0;
    INT高= LEN  -  1;
    INT中旬;

    INT L = sortedArray [小]
    INT H = sortedArray [大]

    而(升&其中; =找到相当&安培;&安培h取代; =找到相当){
        中期=低+((浮动)(高 - 低)*(浮动)(找到相当 -  1))/(1+(浮动)(HL));
        INT M = sortedArray [MID];

        如果(M&LT;找到相当){
            L = sortedArray [低=中等+ 1];
        }否则,如果(M&GT;找到相当){
            H = sortedArray [高=中旬 -  1];
        } 其他 {
            返回中旬;
        }
    }

    如果(sortedArray [小] ==找到相当)
        回报低;
    其他
        返回-1; // 未找到
}

INT的binarySearch(INT sortedArray [],INT找到相当,INT LEN)
{
    //返回找到相当的指数sortedArray,或-1,如果未找到
    INT低= 0;
    INT高= LEN  -  1;
    INT中旬;

    INT L = sortedArray [小]
    INT H = sortedArray [大]

    而(升&其中; =找到相当&安培;&安培h取代; =找到相当){
        中期=(低+高)/ 2;

        INT M = sortedArray [MID];

        如果(M&LT;找到相当){
            L = sortedArray [低=中等+ 1];
        }否则,如果(M&GT;找到相当){
            H = sortedArray [高=中旬 -  1];
        } 其他 {
            返回中旬;
        }
    }

    如果(sortedArray [小] ==找到相当)
        回报低;
    其他
        返回-1; // 未找到
}

INT订单(常量无效* P1,常量无效* P2){返回*(INT *)P1  -  *(INT *)P2; }

诠释的主要(无效){
    INT I = 0,J = 0,大小= 100000,试验= 10000;
    INT检索[审判]
    函数srand( - 时间(0));
    为(J = 0; J&LT;试验; J ++){搜索[J] =兰特()%的大小; }

    而(尺寸大于10){
        INT ARR【尺寸】;
        对于(i = 0; I&LT;大小;我++){ARR [I] =兰特()%的大小; }
        的qsort(ARR,大小的sizeof(INT),顺序);

        无符号长长totalcycles_bs = 0;
        无符号长长totalcycles_is_64 = 0;
        无符号长长totalcycles_is_float = 0;
        无符号长长totalcycles_new = 0;
        INT res_bs,res_is_64,res_is_float,res_new;
        为(J = 0; J&LT;试验; J ++){
            无符号长长TMP,周期= RDTSC();
            res_bs =的binarySearch(ARR,搜索[J],大小);
            TMP = RDTSC(); totalcycles_bs + = TMP  - 循环;周期= TMP;

            res_is_64 = interpolationSearch(ARR,搜索[J],大小);
            断言(res_is_64 == res_bs || ARR [res_is_64] ==搜索[J]);
            TMP = RDTSC(); totalcycles_is_64 + = TMP  - 循环;周期= TMP;

            res_is_float = interpolationSearch2(ARR,搜索[J],大小);
            断言(res_is_float == res_bs || ARR [res_is_float] ==​​搜索[J]);
            TMP = RDTSC(); totalcycles_is_float + = TMP  - 循环;周期= TMP;
        }
        的printf(-----------------大小=%10D \ N,大小);
        的printf(二进制搜索=%10llu \ N,totalcycles_bs);
        的printf(插uint64_t中=%10llu \ N,totalcycles_is_64);
        的printf(插浮=%10llu \ N,totalcycles_is_float);
        的printf(新=%10llu \ N,totalcycles_new);
        的printf(\ N);
        大小&GT;&GT; = 1;
    }
}
 

解决方案

如果您通过数据的内存布局一些控制,你可能想看看朱迪阵列。

或者把一个简单的想法在那里:二进制搜索总是削减搜索空间的一半。最佳切割点可以具有内插找到(切点不应当其中键预计的地方,但在接下来的步骤减小了搜索空间的统计期望的点)。这样可以减少步数,但......不是所有的步骤都等成本。分层存储器允许执行多次测试中的同时作为一个单一的测试中,如果可以维持局部性。因为二进制搜索的最先M步骤仅触摸的2 **中号唯一元素的最大值,存储这些一起可以产生一个更好的减少的搜索空间的每超高速缓存行提取(未每比较),这是在现实世界中的性能更高。

正叉树的基础上工作,然后朱迪阵列添加了一些不那么重要的优化。

底线:即使是随机存取存储器(RAM)快时比随机访问的顺序。一个搜索算法,应当使用事实,它的优势。

Answering to another question, I wrote the program below to compare different search methods in a sorted array. Basically I compared two implementations of Interpolation search and one of binary search. I compared performance by counting cycles spent (with the same set of data) by the different variants.

However I'm sure there is ways to optimize these functions to make them even faster. Does anyone have any ideas on how can I make this search function faster? A solution in C or C++ is acceptable, but I need it to process an array with 100000 elements.

#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <stdint.h>
#include <assert.h>

static __inline__ unsigned long long rdtsc(void)
{
  unsigned long long int x;
     __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
     return x;
}

int interpolationSearch(int sortedArray[], int toFind, int len) {
    // Returns index of toFind in sortedArray, or -1 if not found
    int64_t low = 0;
    int64_t high = len - 1;
    int64_t mid;

    int l = sortedArray[low];
    int h = sortedArray[high];

    while (l <= toFind && h >= toFind) {
        mid = low + (int64_t)((int64_t)(high - low)*(int64_t)(toFind - l))/((int64_t)(h-l));

        int m = sortedArray[mid];

        if (m < toFind) {
            l = sortedArray[low = mid + 1];
        } else if (m > toFind) {
            h = sortedArray[high = mid - 1];
        } else {
            return mid;
        }
    }

    if (sortedArray[low] == toFind)
        return low;
    else
        return -1; // Not found
}

int interpolationSearch2(int sortedArray[], int toFind, int len) {
    // Returns index of toFind in sortedArray, or -1 if not found
    int low = 0;
    int high = len - 1;
    int mid;

    int l = sortedArray[low];
    int h = sortedArray[high];

    while (l <= toFind && h >= toFind) {
        mid = low + ((float)(high - low)*(float)(toFind - l))/(1+(float)(h-l));
        int m = sortedArray[mid];

        if (m < toFind) {
            l = sortedArray[low = mid + 1];
        } else if (m > toFind) {
            h = sortedArray[high = mid - 1];
        } else {
            return mid;
        }
    }

    if (sortedArray[low] == toFind)
        return low;
    else
        return -1; // Not found
}

int binarySearch(int sortedArray[], int toFind, int len) 
{
    // Returns index of toFind in sortedArray, or -1 if not found
    int low = 0;
    int high = len - 1;
    int mid;

    int l = sortedArray[low];
    int h = sortedArray[high];

    while (l <= toFind && h >= toFind) {
        mid = (low + high)/2;

        int m = sortedArray[mid];

        if (m < toFind) {
            l = sortedArray[low = mid + 1];
        } else if (m > toFind) {
            h = sortedArray[high = mid - 1];
        } else {
            return mid;
        }
    }

    if (sortedArray[low] == toFind)
        return low;
    else
        return -1; // Not found
}

int order(const void *p1, const void *p2) { return *(int*)p1-*(int*)p2; }

int main(void) {
    int i = 0, j = 0, size = 100000, trials = 10000;
    int searched[trials];
    srand(-time(0));
    for (j=0; j<trials; j++) { searched[j] = rand()%size; }

    while (size > 10){
        int arr[size];
        for (i=0; i<size; i++) { arr[i] = rand()%size; }
        qsort(arr,size,sizeof(int),order);

        unsigned long long totalcycles_bs = 0;
        unsigned long long totalcycles_is_64 = 0;
        unsigned long long totalcycles_is_float = 0;
        unsigned long long totalcycles_new = 0;
        int res_bs, res_is_64, res_is_float, res_new;
        for (j=0; j<trials; j++) {
            unsigned long long tmp, cycles = rdtsc();
            res_bs = binarySearch(arr,searched[j],size);
            tmp = rdtsc(); totalcycles_bs += tmp - cycles; cycles = tmp;

            res_is_64 = interpolationSearch(arr,searched[j],size);
            assert(res_is_64 == res_bs || arr[res_is_64] == searched[j]); 
            tmp = rdtsc(); totalcycles_is_64 += tmp - cycles; cycles = tmp;

            res_is_float = interpolationSearch2(arr,searched[j],size);
            assert(res_is_float == res_bs || arr[res_is_float] == searched[j]); 
            tmp = rdtsc(); totalcycles_is_float += tmp - cycles; cycles = tmp;
        }
        printf("----------------- size = %10d\n", size);
        printf("binary search          = %10llu\n", totalcycles_bs);
        printf("interpolation uint64_t = %10llu\n",  totalcycles_is_64);
        printf("interpolation float    = %10llu\n",  totalcycles_is_float);
        printf("new                    = %10llu\n",  totalcycles_new);
        printf("\n");
        size >>= 1;
    }
}

解决方案

If you have some control over the in-memory layout of the data, you might want to look at Judy arrays.

Or to put a simpler idea out there: a binary search always cuts the search space in half. An optimal cut point can be found with interpolation (the cut point should NOT be the place where the key is expected to be, but the point which minimizes the statistical expectation of the search space for the next step). This minimizes the number of steps but... not all steps have equal cost. Hierarchical memories allow executing a number of tests in the same time as a single test, if locality can be maintained. Since a binary search's first M steps only touch a maximum of 2**M unique elements, storing these together can yield a much better reduction of search space per-cacheline fetch (not per comparison), which is higher performance in the real world.

n-ary trees work on that basis, and then Judy arrays add a few less important optimizations.

Bottom line: even "Random Access Memory" (RAM) is faster when accessed sequentially than randomly. A search algorithm should use that fact to its advantage.

这篇关于什么是排序后的数组最快的搜索方法?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆