删除C中的数组中的重复 [英] Removing Duplicates in an array in C

查看:122
本文介绍了删除C中的数组中的重复的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

问题有点复杂。这里的问题是摆脱重复,并将数组的唯一元素以原始序列保存到另一个数组中。



例如:



如果输入输入了bacadt



结果应该是:输入输入的确切状态的bacdt。



所以,为了排序数组,检查无法正常工作,因为我丢失了原始序列。我被建议使用一系列的索引,但我不知道该怎么办。那么你的建议是什么?






对于愿意回答这个问题的人,我想添加一些具体的信息。

  char ** finduni(char * words [100],int limit)
{
//
//这里的方法
//
}

是我的功能重复的数组应该被删除并存储在不同的数组中是字[100]。所以,这个过程将会进行。我首先考虑将单词的所有元素变成另一个数组,然后对该数组进行排序,但是在进行了一些测试之后就不行了。

解决方案

嗯,这里是一个 char 类型。注意它不缩放。

  #includestdio.h
#includestring.h

void removeDuplicates(unsigned char * string)
{
unsigned char allCharacters [256] = {0};
int lookAt;
int writeTo = 0;
(lookAt = 0; lookAt< strlen(string); lookAt ++)
{
if(allCharacters [string [lookAt]] == 0)
{
allCharacters [string [lookAt]] = 1; //标记看到
string [writeTo ++] = string [lookAt]; // copy it
}
}
string [writeTo] ='\0';
}

int main()
{
char word [] =abbbcdefbbbghasdddaiouasdf;
removeDuplicates(word);
printf(Word now now [%s] \\\
,word);
return 0;
}

以下是输出:

  Word现在是[abcdefghsiou] 

是像你想要的东西如果您使用 int float double char * 作为类型,此方法根本不会缩放。



编辑



我发布了,然后看到你的澄清,它是一个数组 char * / code>。我会更新方法。






我希望这不是太多的代码。我修改了此QuickSort算法,并基本添加了索引内存。算法是O(n log n),因为下面的3个步骤是加法的,这是2个最差的复杂度。


  1. 排序字符串数组,但每个交换也应该反映在索引数组中。在这个阶段之后, originalIndices 的第i个元素保存排序数组的第i个元素的原始索引。

  2. 将排序数组中的重复元素设置为 NULL ,并将索引值设置为元素,这是最高可以是。

  3. 对原始索引的数组进行排序,并确保每个交换都反映在字符串数组中。这给了我们原始的字符串数组,除了重复的结尾,它们都是 NULL

  4. ,我返回新的元素数。

代码:



<$ p $
#includestdlib.h

void sortArrayAndSetCriteria(char * * arr,int元素,int * originalIndices)
{
#define MAX_LEVELS 1000
char * piv;
int恳求[MAX_LEVELS],结束[MAX_LEVELS],i = 0,L,R;
int idx,cidx; $ id $ id $ id = idx;
beg [0] = 0;
end [0] = elements;
while(i> = 0)
{
L = beg [i];
R = end [i] - 1;
if(L< R)
{
piv = arr [L];
cidx = originalIndices [L];
if(i == MAX_LEVELS-1)
return;
while(L< R)
{
while(strcmp(arr [R],piv))= 0&& L< R)
if(L< R)
{
arr [L] = arr [R];
originalIndices [L ++] = originalIndices [R];
}
while(strcmp(arr [L],piv))< = 0&& L< R)L ++;
if(L< R)
{
arr [R] = arr [L];
originalIndices [R--] = originalIndices [L];
}
}
arr [L] = piv;
originalIndices [L] = cidx;
beg [i + 1] = L + 1;
end [i + 1] = end [i];
end [i ++] = L;
}
else
{
i--;
}
}
}

int removeDuplicatesFromBoth(char ** arr,int elements,int * originalIndices)
{
//现在删除重复项
int i = 1,newLimit = 1;
char * curr = arr [0];
while(i< elements)
{
if(strcmp(curr,arr [i])== 0)
{
arr [i] = NULL ; //如果是malloc'd
originalIndices [i] = elements; //将它放在结尾
}
else
{
curr = arr [i];
newLimit ++;
}
i ++;
}
return newLimit;
}

void sortArrayBasedOnCriteria(char ** arr,int elements,int * originalIndices)
{
#define MAX_LEVELS 1000
int piv;
int恳求[MAX_LEVELS],结束[MAX_LEVELS],i = 0,L,R;
int idx;
char * cidx;
beg [0] = 0;
end [0] = elements;
while(i> = 0)
{
L = beg [i];
R = end [i] - 1;
if(L< R)
{
piv = originalIndices [L];
cidx = arr [L];
if(i == MAX_LEVELS-1)
return;
while(L< R)
{
while(originalIndices R =&&& L< R)
if(L< R)
{
arr [L] = arr [R];
originalIndices [L ++] = originalIndices [R];
}
while(originalIndices [L]< = piv&& L< R)L ++;
if(L< R)
{
arr [R] = arr [L];
originalIndices [R--] = originalIndices [L];
}
}
arr [L] = cidx;
originalIndices [L] = piv;
beg [i + 1] = L + 1;
end [i + 1] = end [i];
end [i ++] = L;
}
else
{
i--;
}
}
}

int removeDuplicateStrings(char * words [],int limit)
{
int * indices =(int *)malloc(limit * sizeof(int));
int newLimit;
sortArrayAndSetCriteria(words,limit,indices);
newLimit = removeDuplicatesFromBoth(words,limit,indices);
sortArrayBasedOnCriteria(words,limit,indices);
free(indices);
return newLimit;
}

int main()
{
char * words [] = {abc,def,bad,hello队长,def,abc,再见};
int newLimit = removeDuplicateStrings(words,8);
int i = 0; (i = 0; i< newLimit; i ++)printf(Word @%d =%s\\\
,i,词[i]);

return 0;
}


The question is a little complex. The problem here is to get rid of duplicates and save the unique elements of array into another array with their original sequence.

For example :

If the input is entered b a c a d t

The result should be : b a c d t in the exact state that the input entered.

So, for sorting the array then checking couldn't work since I lost the original sequence. I was advised to use array of indices but I don't know how to do. So what is your advise to do that?


For those who are willing to answer the question I wanted to add some specific information.

char** finduni(char *words[100],int limit)
{
//
//Methods here
//
}

is the my function. The array whose duplicates should be removed and stored in a different array is words[100]. So, the process will be done on this. I firstly thought about getting all the elements of words into another array and sort that array but that doesn't work after some tests. Just a reminder for solvers :).

解决方案

Well, here is a version for char types. Note it doesn't scale.

#include "stdio.h"
#include "string.h"

void removeDuplicates(unsigned char *string)
{
   unsigned char allCharacters [256] = { 0 };
   int lookAt;
   int writeTo = 0;
   for(lookAt = 0; lookAt < strlen(string); lookAt++)
   {
      if(allCharacters[ string[lookAt] ] == 0)
      {
         allCharacters[ string[lookAt] ] = 1;  // mark it seen
         string[writeTo++] = string[lookAt];     // copy it
      }
   }
   string[writeTo] = '\0';
}

int main()
{
   char word[] = "abbbcdefbbbghasdddaiouasdf";
   removeDuplicates(word);
   printf("Word is now [%s]\n", word);
   return 0;
}

The following is the output:

Word is now [abcdefghsiou]

Is that something like what you want? You can modify the method if there are spaces between the letters, but if you use int, float, double or char * as the types, this method won't scale at all.

EDIT

I posted and then saw your clarification, where it's an array of char *. I'll update the method.


I hope this isn't too much code. I adapted this QuickSort algorithm and basically added index memory to it. The algorithm is O(n log n), as the 3 steps below are additive and that is the worst case complexity of 2 of them.

  1. Sort the array of strings, but every swap should be reflected in the index array as well. After this stage, the i'th element of originalIndices holds the original index of the i'th element of the sorted array.
  2. Remove duplicate elements in the sorted array by setting them to NULL, and setting the index value to elements, which is the highest any can be.
  3. Sort the array of original indices, and make sure every swap is reflected in the array of strings. This gives us back the original array of strings, except the duplicates are at the end and they are all NULL.
  4. For good measure, I return the new count of elements.

Code:

#include "stdio.h"
#include "string.h"
#include "stdlib.h"

void sortArrayAndSetCriteria(char **arr, int elements, int *originalIndices)
{
   #define  MAX_LEVELS  1000
   char *piv;
   int  beg[MAX_LEVELS], end[MAX_LEVELS], i=0, L, R;
   int idx, cidx;
   for(idx = 0; idx < elements; idx++)
      originalIndices[idx] = idx;
   beg[0] = 0;
   end[0] = elements;
   while (i>=0)
   {
      L = beg[i];
      R = end[i] - 1;
      if (L<R)
      {
         piv = arr[L];
         cidx = originalIndices[L];
         if (i==MAX_LEVELS-1)
            return;
         while (L < R)
         {
            while (strcmp(arr[R], piv) >= 0 && L < R) R--;
            if (L < R)
            {
               arr[L] = arr[R];
               originalIndices[L++] = originalIndices[R];
            }
            while (strcmp(arr[L], piv) <= 0 && L < R) L++;
            if (L < R)
            {
               arr[R] = arr[L];
               originalIndices[R--] = originalIndices[L];
            }
         }
         arr[L] = piv;
         originalIndices[L] = cidx;
         beg[i + 1] = L + 1;
         end[i + 1] = end[i];
         end[i++] = L;
      }
      else
      {
         i--;
      }
   }
}

int removeDuplicatesFromBoth(char **arr, int elements, int *originalIndices)
{
   // now remove duplicates
   int i = 1, newLimit = 1;
   char *curr = arr[0];
   while (i < elements)
   {
      if(strcmp(curr, arr[i]) == 0)
      {
         arr[i] = NULL;   // free this if it was malloc'd
         originalIndices[i] = elements;  // place it at the end
      }
      else
      {
         curr = arr[i];
         newLimit++;
      }
      i++;
   }
   return newLimit;
}

void sortArrayBasedOnCriteria(char **arr, int elements, int *originalIndices)
{
   #define  MAX_LEVELS  1000
   int piv;
   int beg[MAX_LEVELS], end[MAX_LEVELS], i=0, L, R;
   int idx;
   char *cidx;
   beg[0] = 0;
   end[0] = elements;
   while (i>=0)
   {
      L = beg[i];
      R = end[i] - 1;
      if (L<R)
      {
         piv = originalIndices[L];
         cidx = arr[L];
         if (i==MAX_LEVELS-1)
            return;
         while (L < R)
         {
            while (originalIndices[R] >= piv && L < R) R--;
            if (L < R)
            {
               arr[L] = arr[R];
               originalIndices[L++] = originalIndices[R];
            }
            while (originalIndices[L] <= piv && L < R) L++;
            if (L < R)
            {
               arr[R] = arr[L];
               originalIndices[R--] = originalIndices[L];
            }
         }
         arr[L] = cidx;
         originalIndices[L] = piv;
         beg[i + 1] = L + 1;
         end[i + 1] = end[i];
         end[i++] = L;
      }
      else
      {
         i--;
      }
   }
}

int removeDuplicateStrings(char *words[], int limit)
{
   int *indices = (int *)malloc(limit * sizeof(int));
   int newLimit;
   sortArrayAndSetCriteria(words, limit, indices);
   newLimit = removeDuplicatesFromBoth(words, limit, indices);
   sortArrayBasedOnCriteria(words, limit, indices);
   free(indices);
   return newLimit;
}

int main()
{
   char *words[] = { "abc", "def", "bad", "hello", "captain", "def", "abc", "goodbye" };
   int newLimit = removeDuplicateStrings(words, 8);
   int i = 0;
   for(i = 0; i < newLimit; i++) printf(" Word @ %d = %s\n", i, words[i]);
   return 0;
}

这篇关于删除C中的数组中的重复的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆