首次和最后出现在C二进制搜索 [英] First and last occurrence for binary search in C

查看:210
本文介绍了首次和最后出现在C二进制搜索的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我想知道我怎么修改它第一个和最后出现的工作二进制搜索,肯定我能在网上找到一些code,但我试图达到深刻的理解,这里是一些基本的非递归二进制搜索,我发现:

  INT二分查找(INT *阵列,诠释number_of_elements,诠释键)
{
    INT低= 0,高= number_of_elements-1,中期;
    而(低< =高)
    {
            中期=(低+高)/ 2;
            如果(阵列[MID]<键)
            {
                    低=中旬+ 1;
            }
            否则,如果(阵列[MID] ==键)
            {
                    返回中旬;
            }
            否则,如果(阵列[MID]>键)
            {
                    高=年年1;
            }    }
    返回-1;
}

修改什么,我需要做的,什么是他们背后的逻辑?


  

编辑:我想它的效率和线性未完成



解决方案

二进制其中包含一个有序集合值,其中值可能会出现不止一次不一定产生的第一个或最后一个元素的数组上搜索。

它产生它所找到的第一个匹配的元素。

由于该元件可以由多个匹配元素所包围,第二步骤是必需的,以便找出第一个和最后一个匹配元素。这可以通过线性搜索进行由其他职位的建议,或者它也可以在对数时间进行。

让我成为第一个找到匹配的指数,由二进制搜索的报道。

于是,平等的序列的开始是[0..i。和对等序列的端部是在[i..N-1]其中,N是序列长度。递归平分的区间,直到边境发现最终得到的第一个和最后一场比赛。

以下(F#)项目显示了几行的想法。它应该是写一个相当于C函数的一件小事。

 让equal_range(A:INT [])I =
    让拍摄第一I0 I1 =
        如果[I0] =一个。[I1] || (I1-I0)LT; 2则
            如果[I0] LT;>一。[I1]
            然后
                I1
            其他
                I0
        其他
            让中期=(I1 - I0)/ 2 + I0
            如果[MID] =一。[I1],然后第一I0其他中旬首次I1中旬
    让REC最后I0 I1 =
        如果一个。[I1] =一[I0] || I1-I0< 2则
            如果[I0] LT;>一。[I1]
            然后
                I0
            其他
                I1
        其他
            让中期=(I1 - I0)/ 2 + I0
            如果[MID] =一[I0],然后最后I1中旬最后还有中旬I0
    (第一0一),(最后我(Array.length A - 1))让test_arrays =
    [
        Array.ofList([1..4] @ [5; 5; 5; 5; 5] @ [6..10])
        [| 1 |]
        [| 1; 1; 1; 1; 1 |]
    ]test_arrays
|> List.iter(乐趣 - >
        printfn%A一
        对于i = 0至Array.length A - 1做
            printfn%D(A [%d个=%d个):%AI I(A []​​)(equal_range A I)
    )

下面相当于非递归C- code:

 的#include<&stdio.h中GT;
#包括LT&;&ASSERT.H GT;
#包括LT&;&stdbool.h GT;typedef结构IndexPair_tag
{
    size_t型一;
    为size_t B:
} IndexPair_t;布尔equal_range(const int的*一,为size_t N,为size_t我,IndexPair_t *结果)
{
    如果(NULL ==一)返回false;
    如果(NULL ==结果)返回false;
    如果(I> = N)返回false;    为size_t I0,I1,中旬;    I0 = 0;
    I1 = I;
    而(!一个[I0] =一个[I1]放大器;及((I1 - I 0)→1))
    {
        中期=(I1 - I 0)/ 2 + I0;
        如果(A [MID] == A [I1])
        {
            I1 =中旬;
        }
        其他
        {
            I0 =中旬;
        }
    }
    如果(A [I0]!= A [I1])
        result->一种= I1;
    其他
        result->一种= I0;    I0 = I;
    I1 = N - 1;
    而(!一个[I0] =一个[I1]放大器;及((I1 - I 0)→1))
    {
        中期=(I1 - I 0)/ 2 + I0;
        如果(A [MID] == A [I0])
        {
            I0 =中旬;
        }
        其他
        {
            I1 =中旬;
        }
    }
    如果(A [I0]!= A [I1])
        result-> B = I0;
    其他
        result-> B = I1;    返回true;
}静态无效ShowArray为(int *一,为size_t N)
{
    如果(N 0)
    {
        的printf([%D中,[0]);
        用于(为size_t I = 1; I< N;我++)
        {
            的printf(%d个,一个[我]);
        }
        的printf(] \\ n);
    }
    其他
        的printf([]的\\ n);}诠释的main()
{
    {
        常量为size_t N = 14;
        诠释一个[N] = {} 1,2,3,4,5,5,5,5,5,6,7,8,9,10;
        ShowArray(A,N);
        IndexPair_t结果;
        用于(为size_t我= 0; I< N;我++)
        {
            如果(equal_range(A,14,I,放大器;结果))
            {
                的printf(%d个(A [%d个=%d):(%D,%D)\\ n,我,我,一个[I],result.a,result.b);
                断言(A [result.a] == A [result.b]);
            }
            其他
            {
                的printf(对于i =%d个,equal_range()返回false \\ n,I);
                断言(假);
            }
        }
    }
    {
        常量为size_t N = 1;
        诠释一个[N] = {1};
        ShowArray(A,N);
        IndexPair_t结果;
        用于(为size_t我= 0; I< N;我++)
        {
            如果(equal_range(A,N,I,放大器;结果))
            {
                的printf(%d个(A [%d个=%d):(%D,%D)\\ n,我,我,一个[I],result.a,result.b);
                断言(A [result.a] == A [result.b]);
            }
            其他
            {
                的printf(对于i =%d个,equal_range()返回false \\ n,I);
                断言(假);
            }
        }
    }
    {
        常量为size_t N = 5;
        诠释一个[N] = {1,1,1,1,1};
        ShowArray(A,N);
        IndexPair_t结果;
        用于(为size_t我= 0; I< N;我++)
        {
            如果(equal_range(A,N,I,放大器;结果))
            {
                的printf(%d个(A [%d个=%d):(%D,%D)\\ n,我,我,一个[I],result.a,result.b);
                断言(A [result.a] == A [result.b]);
            }
            其他
            {
                的printf(对于i =%d个,equal_range()返回false \\ n,I);
                断言(假);
            }
        }
    }    返回0;
}

更新:乔纳森是正确的,该功能的设计是草率和有一些角落情况下的问题。


  • 修正了一个事实,即功能不能报告的说法错误。

  • 增加防御性参数测试equal_range()。

  • 修正了一个事实,即边缘的情况下,错误的结果产生的。

  • 改变测试车手(主),因此所有的边缘情况都包括在内。

的事实,该函数采用一个索引,而不是一个值是好的,IMHO,因为它被认为是第二个步骤,其产生的元素的索引搜索用于第一步骤之后

I'm trying to understand how do I modify the binary search for it work for first and last occurrences, surely I can find some code on the web but I'm trying to reach deep understanding, here is some basic non-recursive binary search I found:

int BinarySearch(int *array, int number_of_elements, int key)
{
    int low = 0, high = number_of_elements-1, mid;
    while(low <= high)
    {
            mid = (low + high)/2;
            if(array[mid] < key)
            {
                    low = mid + 1; 
            }
            else if(array[mid] == key)
            {
                    return mid;
            }
            else if(array[mid] > key)
            {
                    high = mid-1;
            }

    }
    return -1;
}

What modifications do I need to do and what are the logic behind them?

Edit: I would like for it to be efficient and not done linearly.

解决方案

Binary search on an array which contains a sorted set of values, where values may occur more than once does not necessarily yield the first or last element.

It yields the first matching element it finds.

Since this element could be surrounded by by more matching elements, a second step is required, in order to find the first and last matching element. This can be done with linear search as suggested by other posts or it can also be done in logarithmic time.

Let i be the index of the first found match, as reported by binary search.

Then, the start of the "sequence of equals" is in [0..i]. And the end of the "sequence of equals" is in [i..N-1] where N is the length of the sequence. Recursively bisecting those intervals until the border is found eventually yields the first and last match.

The following (f#) program shows the idea in a few lines. It should be a trivial matter to write an equivalent C-function.

let equal_range (a : int[]) i =
    let rec first i0 i1 = 
        if a.[i0] = a.[i1] || (i1-i0) < 2 then
            if a.[i0] <> a.[i1] 
            then
                i1
            else
                i0
        else
            let mid = (i1 - i0) / 2 + i0
            if a.[mid] = a.[i1] then first i0 mid else first mid i1
    let rec last i0 i1 = 
        if a.[i1] = a.[i0] || i1-i0 < 2 then 
            if a.[i0] <> a.[i1] 
            then
                i0
            else
                i1
        else
            let mid = (i1 - i0) / 2 + i0
            if a.[mid] = a.[i0] then last mid i1 else last i0 mid
    (first 0 i),(last i (Array.length a - 1))

let test_arrays = 
    [
        Array.ofList ([1..4] @ [5;5;5;5;5] @ [6..10])
        [|1|]
        [|1;1;1;1;1|]
    ]

test_arrays
|> List.iter(fun a -> 
        printfn "%A" a 
        for i = 0 to Array.length a - 1 do
            printfn "%d(a.[%d] = %d): %A" i i (a.[i]) (equal_range a i)
    )

Here the equivalent, non-recursive C- code:

#include <stdio.h>
#include <assert.h>
#include <stdbool.h>

typedef struct IndexPair_tag
{
    size_t a;
    size_t b;
} IndexPair_t;

bool equal_range(const int * a, size_t n, size_t i, IndexPair_t * result)
{
    if (NULL == a) return false;
    if (NULL == result) return false;
    if (i >= n) return false;

    size_t i0, i1, mid;

    i0 = 0;
    i1 = i;
    while (a[i0] != a[i1] && ((i1 - i0) > 1))
    {
        mid = (i1 - i0) / 2 + i0;
        if (a[mid] == a[i1])
        {
            i1 = mid;
        }
        else
        {
            i0 = mid;
        }
    }
    if (a[i0] != a[i1])
        result->a = i1;
    else
        result->a = i0;

    i0 = i;
    i1 = n - 1;
    while (a[i0] != a[i1] && ((i1 - i0) > 1))
    {
        mid = (i1 - i0) / 2 + i0;
        if (a[mid] == a[i0])
        {
            i0 = mid;
        }
        else
        {
            i1 = mid;
        }
    }
    if (a[i0] != a[i1] )
        result->b = i0;
    else
        result->b = i1;

    return true;
}

static void ShowArray(int *a, size_t N)
{
    if (N > 0)
    {
        printf("[%d", a[0]);
        for (size_t i = 1; i < N; i++)
        {
            printf(", %d", a[i]);
        }
        printf("]\n");
    }
    else
        printf("[]\n");

}

int main()
{
    {
        const size_t N = 14;
        int a[N] = { 1,2,3,4,5,5,5,5,5,6,7,8,9,10 };
        ShowArray(a, N);
        IndexPair_t result;
        for (size_t i = 0; i < N; i++)
        {
            if (equal_range(a, 14, i, &result))
            {
                printf("%d(a[%d] = %d): (%d,%d)\n", i, i, a[i], result.a, result.b);
                assert(a[result.a] == a[result.b]);
            }
            else
            {
                printf("For i = %d, equal_range() returned false.\n", i);
                assert(false);
            }
        }
    }
    {
        const size_t N = 1;
        int a[N] = { 1 };
        ShowArray(a, N);
        IndexPair_t result;
        for (size_t i = 0; i < N; i++)
        {
            if (equal_range(a, N, i, &result))
            {
                printf("%d(a[%d] = %d): (%d,%d)\n", i, i, a[i], result.a, result.b);
                assert(a[result.a] == a[result.b]);
            }
            else
            {
                printf("For i = %d, equal_range() returned false.\n", i);
                assert(false);
            }
        }
    }
    {
        const size_t N = 5;
        int a[N] = { 1,1,1,1,1 };
        ShowArray(a, N);
        IndexPair_t result;
        for (size_t i = 0; i < N; i++)
        {
            if (equal_range(a, N, i, &result))
            {
                printf("%d(a[%d] = %d): (%d,%d)\n", i, i, a[i], result.a, result.b);
                assert(a[result.a] == a[result.b]);
            }
            else
            {
                printf("For i = %d, equal_range() returned false.\n", i);
                assert(false);
            }
        }
    }

    return 0;
}

Update: Jonathan was right, the design of the function was sloppy and had some corner case issues.

  • Fixed the fact that the function cannot report argument errors.
  • Added defensive argument tests to equal_range().
  • Fixed the fact, that for edge cases, wrong results were produced.
  • Changed test driver (main) so all edge cases are covered.

The fact, that the function takes an index, not a value is okay, IMHO, as it is supposed to be the second step, after a first step which produces the index of the element looked for.

这篇关于首次和最后出现在C二进制搜索的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆