如何加快链表中的合并排序? [英] How to accelerate merge sort in linked list?

查看:33
本文介绍了如何加快链表中的合并排序?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我已经有一个迭代版本合并排序算法来对链表进行排序.

I already have an iterative version merge sort algorithm to sort linked list.

以下版本在通常情况下效果很好.

Following version work well in general case.

大致描述我的算法:

我使用按钮,首先进行迭代,将2个节点和2个节点合并为4个已排序的节点,依此类推...

I use button up, first iterate I merge 2 nodes and 2 nodes into 4 nodes which is sorted, and so on...

typedef struct ELE {
    char *value;
    struct ELE *next;
} list_ele_t;

/* Queue structure */
typedef struct {
    list_ele_t *head; /* Linked list of elements */
    list_ele_t *tail; /* Linked list of elements */
    int size;
} queue_t;

void q_sort(queue_t *q)
{
    if (!q || !q->head || q->size == 1)
        return;

    int block_size = 1, n = q->size, i, alen, blen;
    list_ele_t virtual_head;
    list_ele_t *last = NULL, *it = NULL, *l1 = NULL, *l2 = NULL, *tmp = NULL;
    // list_ele_t *l1head = NULL, *l1tail = NULL, *l2tail = NULL;
    virtual_head.next = q->head;
    while (block_size < n) {
        int iter = 0;
        last = &virtual_head;
        it = virtual_head.next;
        while (iter < n) {
            // decide each iterate time's block size a and b
            alen = min(n - iter, block_size);
            // avoid odd block
            blen = min(n - iter - alen, block_size);

            l1 = it;
            l1head = l1;
            // if left block is odd, just skip
            if (blen != 0) {
                // seperate one list in to l1 and l2
                for (i = 0; i < alen - 1; ++i)
                    it = it->next;
                l1tail = it;
                l2 = it->next;
                it->next = NULL;
                it = l2;
                for (i = 0; i < blen - 1; ++i)
                    it = it->next;
                l2tail = it;
                tmp = it->next;
                it->next = NULL;
                it = tmp;
            }


            while (l1 || l2) {
                if (l2 == NULL ||
                    (l1 != NULL && strcmp(l1->value, l2->value) < 0)) {
                    // if l2 doesn't contain any node, just append l1 to
                    // merge list or if value of l1 is smaller
                    last->next = l1;
                    last = last->next;
                    l1 = l1->next;
                } else {
                    // if l1 doesn't contain any node, just append l2 to
                    // merge list or if value of l2 is smaller
                    last->next = l2;
                    last = last->next;
                    l2 = l2->next;
                }
            }
            last->next = NULL;
            iter += alen + blen;
        }
        block_size <<= 1;
    }

    q->head = virtual_head.next;
    list_ele_t *nd = q->head;
    while (nd->next) {
        nd = nd->next;
    }
    q->tail = nd;
} 

当该算法遇到很长的链表时会很慢,但是这种测试台具有一些特定的格式,例如具有多个重复部分.例如AAAAABBBBBBB

When this algorithm encounter very long linked list will be very slow, but this kind of testbench have some specific format like have several repetitive section. e.g. AAAAABBBBBBB

结果,我在原始版本中添加了一些代码.我检查列表1( l1 )和列表2( l2 )的成员是否相同.如果是这样,我跳过合并步骤,只需将 l2 附加到 l1

As a result, I add some code into origin version. I check whether the member of list 1 (l1) and list 2 (l2) are same. If so, I skip merge step, just append l2 to l1

以下是新版本:

void q_sort(queue_t *q)
{
    long long cnt = 0;
    if (!q || !q->head || q->size == 1)
        return;
    int block_size = 1, n = q->size, i, alen, blen;
    list_ele_t virtual_head;
    list_ele_t *last = NULL, *it = NULL, *l1 = NULL, *l2 = NULL, *tmp = NULL;
    list_ele_t *l1head = NULL, *l1tail = NULL, *l2tail = NULL;
    virtual_head.next = q->head;
    while (block_size < n) {
        int iter = 0;
        last = &virtual_head;
        it = virtual_head.next;
        while (iter < n) {
            // decide each iterate time's block size a and b
            alen = min(n - iter, block_size);
            // avoid odd block
            blen = min(n - iter - alen, block_size);
            // printf("%d %d block size: %d\n",alen,blen,block_size);

            l1 = it;
            l1head = l1;
            // if left block is odd, just skip
            if (blen != 0) {
                // seperate one list in to l1 and l2
                for (i = 0; i < alen - 1; ++i)
                    it = it->next;
                l1tail = it;
                l2 = it->next;
                it->next = NULL;
                it = l2;
                for (i = 0; i < blen - 1; ++i)
                    it = it->next;
                l2tail = it;
                tmp = it->next;
                it->next = NULL;
                it = tmp;
            }

            if (l1head && l2tail && strcmp(l1head->value, l2tail->value) == 0
                && (l1tail && l2 && strcmp(l1tail->value,l2->value) == 0)) {
                iter += alen + blen;
                last->next = l1;
                l1tail->next = l2;
                last = l2tail;
                l2tail->next = NULL;
            } else {
                while (l1 || l2) {
                    if (l2 == NULL ||
                        (l1 != NULL && strcmp(l1->value, l2->value) < 0)) {
                        // if l2 doesn't contain any node, just append l1 to
                        // merge list or if value of l1 is smaller
                        last->next = l1;
                        last = last->next;
                        l1 = l1->next;
                    } else {
                        // if l1 doesn't contain any node, just append l2 to
                        // merge list or if value of l2 is smaller
                        last->next = l2;
                        last = last->next;
                        l2 = l2->next;
                    }
                }
                last->next = NULL;
                iter += alen + blen;
            }
        }
        block_size <<= 1;
    }

    q->head = virtual_head.next;
    list_ele_t *nd = q->head;
    while (nd->next) {
        nd = nd->next;
    }
    q->tail = nd;
}

概念很简单,但是我的加法似乎是错误的.错误消息告诉我也许存在无限循环.

Concept is simple but my addition seems wrong. Error message told me maybe there exist infinite loop.

首先,我想知道添加此部分时是否犯了任何错误?该如何解决?

First, I wonder are there any mistake I make when I add this section? how to fix it?

if (l1head && l2tail && strcmp(l1head->value, l2tail->value) == 0
            && (l1tail && l2 && strcmp(l1tail->value,l2->value) == 0)) {
            iter += alen + blen;
            last->next = l1;
            l1tail->next = l2;
            last = l2tail;
            l2tail->next = NULL;
        }

第二,是否有更好的方法来加速此特定格式的链表?(例如 C-> C-> C-> C-> B-> B ..... B )

Second, are there better way to accelerate this specific format linked list? ( e.g. C->C->C->C->B->B.....B)

谢谢大家的建议!

推荐答案

使用小(32)数组列表"的示例代码,其中array [i]是具有2 ^ i个节点的列表(最后一个条目是大小不限).这可能是使用列表对列表进行排序的最快方法.通常,将列表复制到数组中,对数组进行排序并从数组中创建新的排序后的列表,通常会更快.输入参数是指向列表的第一个节点的指针,返回值是指向排序后的列表的第一个节点的指针.对列表进行排序后,调用代码将需要扫描排序后的列表,以在列表结构中设置尾指针.合并中的比较使用"src2< src1"来遵循C ++模板排序中使用的约定.

Example code that uses a small (32) array of "lists", where array[i] is a list with 2^i nodes (except last entry is unlimited size). This is probably the fastest way to sort a list using a list. It's usually faster to copy the list to an array, sort the array, and create a new sorted list from the array. The input parameter is a pointer to the first node of a list, and the returned value is a pointer to the first node of the sorted list. After sorting a list, the calling code will need to scan the sorted list to set the tail pointer in the list structure. The comparison in merge uses "src2 < src1" to follow the conventions used in C++ template sorts.

typedef struct NODE_{
struct NODE_ * next;
uint64_t data;
}NODE;

NODE * MergeLists(NODE *, NODE *);      /* prototype */

#define NUMLISTS 32                     /* number of lists */

NODE * SortList(NODE *pList)
{
NODE * aList[NUMLISTS];                 /* array of lists */
NODE * pNode;
NODE * pNext;
size_t i;
    if(pList == NULL)                   /* check for empty list */
        return NULL;
    for(i = 0; i < NUMLISTS; i++)       /* zero array */
        aList[i] = NULL;
    pNode = pList;                      /* merge nodes into array */
    while(pNode != NULL){
        pNext = pNode->next;
        pNode->next = NULL;
        for(i = 0; (i < NUMLISTS) && (aList[i] != NULL); i++){
            pNode = MergeLists(aList[i], pNode);
            aList[i] = NULL;
        }
        if(i == NUMLISTS)
            i--;
        aList[i] = pNode;
        pNode = pNext;
    }
    pNode = NULL;                       /* merge array into one list */
    for(i = 0; i < NUMLISTS; i++)
        pNode = MergeLists(aList[i], pNode);
    return pNode;
}

NODE * MergeLists(NODE *pSrc1, NODE *pSrc2)
{
NODE *pDst = NULL;                      /* destination head ptr */
NODE **ppDst = &pDst;                   /* ptr to head or prev->next */
    if(pSrc1 == NULL)
        return pSrc2;
    if(pSrc2 == NULL)
        return pSrc1;
    while(1){
        if(pSrc2->data < pSrc1->data){  /* if src2 < src1 */
            *ppDst = pSrc2;
            pSrc2 = *(ppDst = &pSrc2->next);
            if(pSrc2 == NULL){
                *ppDst = pSrc1;
                break;
            }
        } else {                        /* src1 <= src2 */
            *ppDst = pSrc1;
            pSrc1 = *(ppDst = &pSrc1->next);
            if(pSrc1 == NULL){
                *ppDst = pSrc2;
                break;
            }
        }
    }
    return pDst;
}

这篇关于如何加快链表中的合并排序?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆