如何将字符串分隔成独特的字符/字符串数组 [英] How to separate a string into an array of the unique characters/ strings

查看:124
本文介绍了如何将字符串分隔成独特的字符/字符串数组的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

基本上我想知道这是否是可能的(如果是的话如何),由左到右,并贪婪地终止,一个新的串中发现追加一次读取一个字符串。例如:

ABCABCABCABC会给{ABCAB,CA,BCABC}

我一直在试图整天我最终被打破code和崩溃的程序。

这是我有什么不工作。该阵列被定义为*一个[linelen]

 为(i = 0; I< linelen;我++)
{
    J = 0;
    K = 0;
    tempstr [J] =输入[I] //从输入移动字符tempstring
        对于(K = 0; K< ARRAY_SIZE; k ++)//通过阵列搜索
        {
            tempstr [J] =输入[I]
            如果(*一个!= tempstr)//(STRCMP(一,tempstr))!= 0)//如果str不是在阵列
            {
                的printf(%S \\ n,一个[0]); //调试
                一个[ARRAY_SIZE] = tempstr;
                //的strcpy(A [ARRAY_SIZE] tempstr); // str的复制到数组
                ARRAY_SIZE ++;
                memset的(tempstr,0,linelen-i)的; // tempstr重置为空
                J = 0;            }
            如果(*一== tempstr)//(STRCMP(A [ARRAY_SIZE] tempstr))== 0)
            {
                J ++;
                tempstr [J] =输入[I + 1];
                如果(我!= -1 linelen)//否则的话tempstr已在阵
                {
                    的printf(%S \\ n,一个[0]); //调试
                    J ++;
                    tempstr [J] =输入[I + 1];
                }
                否则,如果(我== linelen -1)//如果它是最后一个字母
                {
                    一个[ARRAY_SIZE] = tempstr;
                    //的strcpy(A [ARRAY_SIZE] tempstr); //添加到阵列
                    打破;
                }            }
        }}


解决方案

下面是一个使用一个简单的字符数组存储看到的字符串:

 的#include<&stdio.h中GT;#如果0
DBG的#define(_fmt ...)的printf(_fmt)
#其他
DBG的#define(_fmt ...)/ ** /
#万一//注意:可能是char *和realloc如果有必要
可见烧焦[5000];//查找 - 查找旧串
//返回:1 =找到,0 =不匹配
INT
找到(字符*海峡)
{
    字符* LHS;
    字符* RHS;
    INT foundflg;    DBG(查找:海峡='%s'的\\ n,STR);    RHS =海峡;
    LHS =可见;
    DBG(查找:LHS ='%s'的\\ n,看到的);    foundflg = 0;
    对于(; LHS< STR ++ LHS,RHS ++){
        DBG(查找:尝试LHS ='%s'的RHS ='%s'的\\ n,LHS,RHS);        如果(* LHS!= * RHS){
            DBG(查找:跳过\\ n);
            为(;!* LHS = 0; ++左图);
            RHS = STR - 1;
            继续;
        }        如果((* LHS == 0)及及(* RHS == 0)){
            DBG(查找:MATCH \\ n);
            foundflg = 1;
            打破;
        }        如果(* RHS == 0)
            打破;
    }    返回foundflg;
}空虚
sepstr(为const char * INP)
{
    INT CHR;
    字符* LHS;
    字符* RHS;
    INT finflg;    LHS =可见;
    RHS =可见;
    finflg = 0;    对于(CHR = * INP; CHR = 0;!CHR = * ++ INP){
        * RHS ++ = CHR;
        * RHS = 0;        如果(找到(左)){
            finflg = 1;
            继续;
        }        的printf(%S \\ n,左轴);
        LHS = RHS ++;
        finflg = 0;
    }    如果(finflg)
        的printf(%S \\ n,左轴);
}INT
主(INT ARGC,字符** argv的)
{1#如果
    sepstr(ABCABCABCABC);
#其他
    sepstr(ABCABCABCABCABC);
#万一
}

下面是做第二种方式:

 的#include<&stdio.h中GT;烧焦出[500];#IFDEF BIG
#定义SEEN 256
#其他
的#define SEEN(26 + 1)的
#万一可见烧焦[SEEN] [SEEN];空虚
sepstr(为const char * INP)
{
    INT CHR;
    字符* PRV;
    字符* RHS;    PRV =见过[0];    RHS =出来;
    对于(CHR = * INP; CHR = 0;!CHR = * ++ INP){
        * RHS ++ = CHR;BIG的#ifndef
        CHR =(CHR - 'A')+ 1;
#万一        如果(PRV [CHR]){
            PRV =见过[CHR]
            继续;
        }        * RHS = 0;
        的printf(%S \\ n,出来);        PRV [CHR] = 1;
        RHS =出来;
        PRV =见过[0];
    }    如果(右>淘汰){
        * RHS = 0;
        的printf(%S \\ n,出来);
    }
}INT
主要(无效)
{1#如果
    sepstr(ABCABCABCABC);
#其他
    sepstr(ABCABCABCABCABC);
#万一    返回0;
}

下面是一些基准为每个人的计划(时间以纳秒和printf nop'ed):

第一作者的最低
         527 137 craig1 - 原创 - 使用单看到字符数组
         146 39 craig2 - 修改 - 采用2D可见表
       45234 45234 felix1 - 原创 - 可能只执行一次
       40460 656 felix2 - 使用固定的输入
          24 18计算机1 - 原创 - 使用缓冲区[20] [20]堆栈
         908 417机2 - 改性 - 使用全局缓冲器[20] [20]
       43089 1120 milevyo1 - 原创
       42719 711 milevyo2 - parseString TMP的堆栈缓存没有的malloc
        7957 429 milevyo3 - NewNode使用固定池中没有的malloc
        7457 380 milevyo4 - 删除链表

Basically I want to know if it is possible (if so how) to read a string from left to right and greedily terminate and append once that a new string is found. Eg.

"ABCABCABCABC" would give {"A" "B" "C" "AB" "CA" "BC" "ABC"}

I've been trying all day and all I end up with is broken code and crashing programs.

This is what I have that isn't working. The array is defined as *a[linelen]

for(i =0; i < linelen ;i++)
{
    j=0;
    k=0; 
    tempstr[j] = input[i]; // move character from input to tempstring 
        for(k=0; k< array_size; k++) //search through array
        {
            tempstr[j] = input[i];
            if(*a != tempstr)//(strcmp(a,tempstr)) != 0) // if str not in array
            {
                printf("%s\n", a[0]); //debug
                a[array_size] = tempstr;
                //strcpy(a[array_size], tempstr); //copy str into array
                array_size++;
                memset(tempstr,0,linelen-i); // reset tempstr to empty
                j=0;

            } 
            if( *a == tempstr)//(strcmp(a[array_size],tempstr)) == 0)
            {
                j++;
                tempstr[j] = input[i+1];
                if(i != linelen -1) // otherwise if tempstr already in array
                {
                    printf("%s\n",a[0]); //debug
                    j++;
                    tempstr[j] = input[i+1];
                }
                else if (i == linelen -1) // if it is the last letter
                {
                    a[array_size] = tempstr;
                    //strcpy(a[array_size], tempstr); // add to array
                    break;
                }

            }
        }

}

解决方案

Here's one that uses a simple character array to store the "seen" strings:

#include <stdio.h>

#if 0
#define dbg(_fmt...)        printf(_fmt)
#else
#define dbg(_fmt...)        /**/
#endif

// NOTE: could be char * and realloc if necessary
char seen[5000];

// find -- find old string
// RETURNS: 1=found, 0=no match
int
find(char *str)
{
    char *lhs;
    char *rhs;
    int foundflg;

    dbg("find: str='%s'\n",str);

    rhs = str;
    lhs = seen;
    dbg("find: lhs='%s'\n",seen);

    foundflg = 0;
    for (;  lhs < str;  ++lhs, ++rhs) {
        dbg("find: TRY lhs='%s' rhs='%s'\n",lhs,rhs);

        if (*lhs != *rhs) {
            dbg("find: SKIP\n");
            for (;  *lhs != 0;  ++lhs);
            rhs = str - 1;
            continue;
        }

        if ((*lhs == 0) && (*rhs == 0)) {
            dbg("find: MATCH\n");
            foundflg = 1;
            break;
        }

        if (*rhs == 0)
            break;
    }

    return foundflg;
}

void
sepstr(const char *inp)
{
    int chr;
    char *lhs;
    char *rhs;
    int finflg;

    lhs = seen;
    rhs = seen;
    finflg = 0;

    for (chr = *inp;  chr != 0;  chr = *++inp) {
        *rhs++ = chr;
        *rhs = 0;

        if (find(lhs)) {
            finflg = 1;
            continue;
        }

        printf("%s\n",lhs);
        lhs = ++rhs;
        finflg = 0;
    }

    if (finflg)
        printf("%s\n",lhs);
}

int
main(int argc,char **argv)
{

#if 1
    sepstr("ABCABCABCABC");
#else
    sepstr("ABCABCABCABCABC");
#endif
}

Here's a second way to do it:

#include <stdio.h>

char out[500];

#ifdef BIG
#define SEEN 256
#else
#define SEEN (26 + 1)
#endif

char seen[SEEN][SEEN];

void
sepstr(const char *inp)
{
    int chr;
    char *prv;
    char *rhs;

    prv = seen[0];

    rhs = out;
    for (chr = *inp;  chr != 0;  chr = *++inp) {
        *rhs++ = chr;

#ifndef BIG
        chr = (chr - 'A') + 1;
#endif

        if (prv[chr]) {
            prv = seen[chr];
            continue;
        }

        *rhs = 0;
        printf("%s\n",out);

        prv[chr] = 1;
        rhs = out;
        prv = seen[0];
    }

    if (rhs > out) {
        *rhs = 0;
        printf("%s\n",out);
    }
}

int
main(void)
{

#if 1
    sepstr("ABCABCABCABC");
#else
    sepstr("ABCABCABCABCABC");
#endif

    return 0;
}

Here are some benchmarks for everybody's program (time in ns and printf nop'ed):

       first      minimum author
         527          137 craig1 -- original -- uses single seen char array
         146           39 craig2 -- modified -- uses 2D seen table
       45234        45234 felix1 -- original -- may only be executed once
       40460          656 felix2 -- uses fixed input
          24           18 machine1 -- original -- uses buffer[20][20] on stack
         908          417 machine2 -- modified -- uses global buffer[20][20]
       43089         1120 milevyo1 -- original
       42719          711 milevyo2 -- parseString tmp is stack buffer no malloc
        7957          429 milevyo3 -- NewNode uses fixed pool no malloc
        7457          380 milevyo4 -- removed linked list

这篇关于如何将字符串分隔成独特的字符/字符串数组的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆