重复删除 [英] Duplicate removal

查看：175 发布时间：2017/7/20 23:37:51 arrays algorithm duplicates

本文介绍了重复删除的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

完整的问题：

在O（n）时间复杂度中使用C ++ / Java在一维数组中实现重复删除算法，没有额外的空间。例如，如果输入数组是{3,5,5,3,7,8,5,8,9,9}，则输出应为{3,5,7,8,9}。

我已经想了很久了，还没有解决它。

我的想法：

如果数组被排序，我可以删除O（n）中的重复项。但是我知道的最快的排序算法具有O（n * log（n））复杂度。

在O（n）中排序的一种算法是bin或bucket分类。这个问题在于它不能在没有使用额外空间的情况下实现。

我想知道是否可能。

我已经研究了一下，没有发现任何新内容。

感谢任何帮助。 / p>

PS：如果明天不参加考试，我会给予更多的时间。

解决方案

确实有可能，只需使用就地基数排序

运行 O（kn）其中 k 是任何标准数字数据类型的常量，并且需要 O（1）额外的空间。

 
 
 这里是代码：
  #include< stdio.h> 
 #include< stdlib.h> 
 #include< stdint.h> 
 
 /// in-place 32位递归基数排序
 void I32R（int32_t * data，uint32_t size，uint32_t nbit）{
 uint32_t dbgn =（uint32_t）-1 ，dend = size; 
 
 while（++ dbgn< dend）
 if（data [dbgn]& nbit）
 while（dbgn< --dend）
 if 〜data [dend]& nbit）{
 data [dbgn] ^ = data [dend]; 
 data [dend] ^ = data [dbgn]; 
 data [dbgn] ^ = data [dend]; 
 break; 
} 
 if（（nbit>> = 1）&&（dend> 1））
 I32R（data，dend，nbit）; 
 if（nbit&&（size  -  dend> 1））
 I32R（data + dend，size-dend，nbit）; 
} 
 
 /// O_t（n）/ O_s（1）重复删除
 int32_t dups（int32_t * data，uint32_t size）{
 int32_t iter，* uniq = data; 
 
 if（size< 2）
返回大小; 
 for（iter = 0; iter< size; iter ++）
 data [iter] ^ =（1< 31）; 
 I32R（data，size，1< 31）; 
 data [0] ^ =（1 <<31）; 
如果（* uniq！=（data [iter] ^ =（1< 31）））
 * + + uniq = data [iter]; 
 return uniq  -  data + 1; 
} 
 
 void parr（int32_t * data，uint32_t size）{
 for（; size; size--）
 printf（％4d％s * data ++，（size == 1）？\\\
\\\
：，）; 
} 
 
 int main（）{
 int32_t iter，size，* data; 
 
 data = malloc（（size = 256）* sizeof（* data））; 
 for（iter = 0; iter< size; iter ++）
 data [iter] =（int8_t）rand（）& -3; 
 parr（data，size）; 
 parr（data，dups（data，size））; 
（数据）; 
 return 0; 
} 
  
  NB＃1：正数数字变得大于负数时，排序是必要的，因为基数排序只对无符号值进行操作。
 
 
   NB＃2：这只是
 
 
   NB＃3：哇，这实际上比 qsort ）！
 
Let's be honest, this is a hw question.

The question in its entirety:

  Implement duplicate removal algorithm in a one-dimensional array using C++/Java in O(n) time complexity with no extra space. For example, if the input array is {3,5,5,3,7,8,5,8,9,9} then the output should be {3,5,7,8,9}.
I have thought about it for quite a while and haven't been able to solve it yet.

My thoughts:

I could remove duplicates in O(n) if the array was sorted. But the fastest sorting algorithm I know has O(n*log(n)) complexity.
One algorithm that sorts in O(n) is bin or bucket sort. The problem here is that it cannot be implemented without using extra space.
I wonder if it is possible at all.
I have researched a bit and have found nothing new.

Thanks for any help.

P.S.: I would have given it more time if it weren't for the exam tomorrow.
 解决方案

This is indeed possible, just use in-place radix sort.

It runs for O(kn) where k is constant for any standard numeric data type, and requires O(1) extra space.

Here`s the code:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>

/// in-place 32-bit recursive radix sort
void I32R(int32_t *data, uint32_t size, uint32_t nbit) {
    uint32_t dbgn = (uint32_t)-1, dend = size;

    while (++dbgn < dend)
        if (data[dbgn] & nbit)
            while (dbgn < --dend)
                if (~data[dend] & nbit) {
                    data[dbgn] ^= data[dend];
                    data[dend] ^= data[dbgn];
                    data[dbgn] ^= data[dend];
                    break;
                }
    if ((nbit >>= 1) && (dend > 1))
        I32R(data, dend, nbit);
    if (nbit && (size - dend > 1))
        I32R(data + dend, size - dend, nbit);
}

/// O_t(n) / O_s(1) duplicate remover
int32_t dups(int32_t *data, uint32_t size) {
    int32_t iter, *uniq = data;

    if (size < 2)
        return size;
    for (iter = 0; iter < size; iter++)
        data[iter] ^= (1 << 31);
    I32R(data, size, 1 << 31);
    data[0] ^= (1 << 31);
    for (iter = 1; iter < size; iter++)
        if (*uniq != (data[iter] ^= (1 << 31)))
            *++uniq = data[iter];
    return uniq - data + 1;
}

void parr(int32_t *data, uint32_t size) {
    for (; size; size--)
        printf("%4d%s", *data++, (size == 1)? "\n\n" : ", ");
}

int main() {
    int32_t iter, size, *data;

    data = malloc((size = 256) * sizeof(*data));
    for (iter = 0; iter < size; iter++)
        data[iter] = (int8_t)rand() & -3;
    parr(data, size);
    parr(data, dups(data, size));
    free(data);
    return 0;
}
N.B.#1: inverting the sign bit before sorting is necessary for positive numbers to become greater than negatives, as radix sort only operates on unsigned values.

N.B.#2: this is just a rough illustration, never really tested.

N.B.#3: oh wow, this is actually faster than qsort()!

                        这篇关于重复删除的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持IT屋！


                    
                        查看全文

重复删除 [英] Duplicate removal

问题描述

相关文章

其他开发最新文章

热门教程

热门工具

登录关闭

重复删除 [英] Duplicate removal

问题描述

相关文章

其他开发最新文章

热门教程

热门工具

登录 关闭

登录关闭