一个更好的方法来将字符串分割成C / C字符串数组++使用空格作为分隔符 [英] A better way to split a string into an array of strings in C/C++ using whitespace as a delimiter

查看：458 发布时间：2016/8/17 23:21:40 c++ c string

本文介绍了一个更好的方法来将字符串分割成C / C字符串数组++使用空格作为分隔符的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

对不起，我的C / C ++并不好，但下面的现有code看起来像垃圾，甚至给我。它也有一个错误 - 当海峡=07/02/2010通过'\\ 0'终止失败 - 。我认为这不是一个固定的bug，那还不如重写。在Python它仅仅是'KAS \\ nhjkfh kjsdjkasf'.split（）。我知道这是C-ISH code，但它不能是复杂的分割线！坚持同样的签名，而无需使用额外的库，我怎么能改善它 - 让短暂的甜蜜？我可以告诉大家，这个code气味，例如由于else子句一路结尾。

LINE失败：

  _tcsncpy_s（
    s.GetBuffer（（INT）（参数nIndex-n上次））
    参数nIndex-n上次，
    PSZ + n上次，
    （为size_t）（参数nIndex-n上次）
）;

通过字符串07/02/2010的终止'\\ 0'，它会尝试为11个字符写入缓冲区是只长10个字符。

全部功能：

 的#define//这将返回文本字符串作为一个字符串数组
//该功能从SetControlText称为解析
//文本字符串转换CStrings数组，该控制
//小工具将尝试间preTBOOL CLVGridDateTimeCtrl :: ParseTextWithCurrentFormat（常量的CString＆放大器;海峡，常量CGXStyle * pOldStyle，CStringArray的＆安培; strArray）
{
    // 没用过：
    pOldStyle;    //我们假设显著段由空间分隔    //请更改m_strDelim添加其他分隔符    C字符串;    LPCTSTR PSZ =（LPCTSTR）STR;    BOOL bLastCharSpace = FALSE;
    DWORD大小= str.GetLength（）+ 1;    //（新行会开始一个新行，的分隔符会
    //移到下一列）。
    //解析缓冲区（DBCS知道）
    为（DWORD参数nIndex = 0，n上次= 0;参数nIndex＆LT;大小;参数nIndex + = _tclen（PSZ +参数nIndex））
    {
        //检查一个分隔符
        如果（PSZ [参数nIndex] == _T（'\\ 0'）|| _tcschr（_T（\\ r \\ n），PSZ [参数nIndex]）|| _tcschr（_T（），PSZ [参数nIndex]）
            ！|| _tcscspn（安培; PSZ [参数nIndex]，（LPCTSTR）m_strDelim））
        {
            s.ReleaseBuffer（）;
            s.Empty（）;
            //中断不作解析，如果下一个字符字符串
            //是结束串
            如果（PSZ [参数nIndex] == _T（'\\ 0'））
            {
                如果（PSZ [参数nIndex] == _T（'\\ r'）及和放大器; PSZ [参数nIndex + 1] == _T（'\\ n'））
                    参数nIndex ++;                _tcsncpy_s（s.GetBuffer（（INT）（参数nIndex-n上次））
                    参数nIndex-n上次，
                            PSZ + n上次，
                            （为size_t）（参数nIndex-n上次））;
                CString的temStr = S;
                strArray.Add（temStr）;
                temStr.Empty（）;
                打破;
            }            否则，如果（_tcscspn（安培; PSZ [参数nIndex]，（LPCTSTR）m_strDelim）== 0安培;＆安培;！bLastCharSpace）
            {
                如果（PSZ [参数nIndex] == _T（'\\ r'）及和放大器; PSZ [参数nIndex + 1] == _T（'\\ n'））
                    参数nIndex ++;                _tcsncpy_s（s.GetBuffer（（INT）（参数nIndex-n上次））
                    参数nIndex-n上次，
                            PSZ + n上次，
                            （为size_t）（参数nIndex-n上次））;
                CString的temStr = S;
                strArray.Add（temStr）;
                temStr.Empty（）;
                bLastCharSpace = TRUE;
                //中断不作解析，如果下一个字符字符串
                //是结束串
                如果（PSZ [参数nIndex + 1] == _T（'\\ 0'））
                    打破;            }
            //现在，该值已经被拷贝到细胞内，
            //让我们检查，如果我们要跳转到一个新行。
            否则如果（_tcschr（_T（），PSZ [参数nIndex]）及＆放大器;！bLastCharSpace）
            {
                如果（PSZ [参数nIndex] == _T（'\\ r'）及和放大器; PSZ [参数nIndex + 1] == _T（'\\ n'））
                    参数nIndex ++;                _tcsncpy_s（s.GetBuffer（（INT）（参数nIndex-n上次））
                    参数nIndex-n上次，
                            PSZ + n上次，
                            （为size_t）（参数nIndex-n上次））;
                CString的temStr = S;
                strArray.Add（temStr）;
                temStr.Empty（）;
                bLastCharSpace = TRUE;
                //中断不作解析，如果下一个字符字符串
                //是结束串
                如果（PSZ [参数nIndex + 1] == _T（'\\ 0'））
                    打破;
            }            n上次=参数nIndex + _tclen（PSZ +参数nIndex）;
        }
        其他
        {
            // n上次=参数nIndex + _tclen（PSZ +参数nIndex）;
            bLastCharSpace = FALSE;
        }
    }
    如果（strArray.GetSize（））
        返回TRUE;
    其他
        返回FALSE;
}

的修改的
m_strDelim = _T（）; 这个成员变量只在这个函数中使用。我想我现在看到符号化的地步 - 它试图解析日期和时间...等待，还有更多！这里是code这下面调用此函数。请帮我改善这一点。我的一些同事声称，C＃使他们不会比C ++更高效。我曾经觉得自己不能够说我一样白痴。

  // SetControlText将尝试用文字先转换为有效日期
//的COleDateTime的帮助下，然后用日期控件的帮助和
//当前格式BOOL CLVGridDateTimeCtrl :: ConvertControlTextToValue（CString的放大器; STR，ROWCOL nRow，ROWCOL NCOL，常量CGXStyle * pOldStyle）
{
    CGXStyle * PSTYLE = NULL;
    BOOL bSuccess = FALSE;    如果（pOldStyle == NULL）
    {
        PSTYLE =网格（） -  GT; CreateStyle（）;
        网格（） -  GT; ComposeStyleRowCol（nRow，NCOL，PSTYLE）;
        pOldStyle = PSTYLE;
    }    //只允许有效的输入
    {
        //首先做到这一点
        CLVDateTime DT;        如果（str.IsEmpty（））
        {
            ;
            //如果（网格（） -  GT; IsCurrentCell（nRow，NCOL））
            //  重启（）;
            bSuccess = TRUE;
        }
        否则如果（dt.ParseDateTime（STR，CLVGlobals :: IsUSDateFormat（））及＆放大器;！（DATE）dt的= 0）
        {
            SetDateTime（DT）;
            如果（m_bDateValueAsNumber）
                str.Format（_T（％G），（DATE）DT）;
            其他
                海峡= dt.Format（）;
            bSuccess = TRUE;
        }
        其他
        {
            //使用当前格式解析字符串
            CStringArray的strArray;
            如果（！ParseTextWithCurrentFormat（STR，pOldStyle，strArray））
                返回FALSE;            UpdateNullStatus（m_TextCtrlWnd）;            和setFormat（m_TextCtrlWnd，* pOldStyle）;            INT nArrIndex = 0;
            的for（int i = 0; I＆LT; m_TextCtrlWnd.m_gadgets.GetSize（）;我++）
            {
                INT VAL = m_TextCtrlWnd.m_gadgets [Ⅰ]  -  GT;的GetValue（）;
                // s.Empty（）;
                如果（m_TextCtrlWnd.m_gadgets [Ⅰ]  -  GT; IsKindOf（RUNTIME_CLASS（SECDTNumericGadget）））
                {
                    // TRACE（_T（值％S \\ n），strArray [nArrIndex]）;
                    （（CLVDTNumericGadget *）m_TextCtrlWnd.m_gadgets [I]） -  GT; m_nNewValue = _ttoi（strArray [nArrIndex]）;
                    nArrIndex ++;
                    如果（nArrIndex＆GT; strArray.GetUpperBound（））
                            打破;
                }
                否则如果（m_TextCtrlWnd.m_gadgets [Ⅰ]  -  GT; IsKindOf（RUNTIME_CLASS（SECDTListGadget））及＆放大器;！VAL =  -  1）
                {
                    国际nIndex =（（CLVDTListGadget *）m_TextCtrlWnd.m_gadgets [I]） - ＆GT; FindMatch（strArray [nArrIndex]，（（CLVDTListGadget *）m_TextCtrlWnd.m_gadgets [I]） - ＆GT;的GetValue（）+ 1）;
                    如果（参数nIndex！=  -  1）
                    {
                        // TRACE（_T（值％S \\ n），strArray [nArrIndex]）;
                        （（CLVDTListGadget *）m_TextCtrlWnd.m_gadgets [I]） -  GT;的SetValue（参数nIndex）;
                        nArrIndex ++;
                        如果（nArrIndex＆GT; strArray.GetUpperBound（））
                            打破;
                    }                }                CLVDBValue DBDATE = m_TextCtrlWnd.GetDateTime（）;
                如果（dbDate.IsNull（））
                    海峡= _T（）;
                其他
                {
                    CLVDateTime DT =（CLVDateTime）DBDATE;
                    如果（m_bDateValueAsNumber）
                        str.Format（_T（％G），（DATE）DT）;
                    其他
                        海峡= dt.Format（）;
                }
            }
            bSuccess = TRUE;
        }
    }    如果（PSTYLE）
        网格（） -  GT; RecycleStyle（PSTYLE）;    返回bSuccess;
}

解决方案

工具包库（Strtk）具有以下解决您的问题：

 的#include＆LT;串GT;
＃包括LT＆;＆双端GT;
＃包括strtk.hpp
诠释的main（）
{
   标准::字符串数据（KAS \\ nhjkfh kjsdjkasf）;
   的std :: deque的＆LT;标准::字符串＆GT;为str_list;
   strtk ::解析（数据，\\ r \\ n，为str_list）;
   返回0;
}

更多的例子可以发现这里

Sorry, my C/C++ is not that good, but the following existing code looks like garbage even to me. It also has a bug - fails when str = "07/02/2010" terminated by '\0' - . I think that instead of fixing a bug, it might as well be rewritten. In Python it is just 'kas\nhjkfh kjsdjkasf'.split(). I know this is C-ish code, but it cannot be that complicated to split a string! Sticking to the same signature, and without using extra libraries, how can I improve it - make it short and sweet? I can tell that this code smells, for instance because of the else clause all the way at the end.

LINE THAT FAILS:

_tcsncpy_s(
    s.GetBuffer((int) (nIndex-nLast)),
    nIndex-nLast,
    psz+nLast,
    (size_t) (nIndex-nLast)
);

With the string "07/02/2010" terminated by '\0' it will try to write 11 characters into a buffer that is only 10 characters long.

FULL FUNCTION:

#define 

// This will return the text string as a string array
// This function is called from SetControlText to parse the
// text string into an array of CStrings that the control
// Gadgets will attempt to interpret

BOOL CLVGridDateTimeCtrl::ParseTextWithCurrentFormat(const CString& str, const CGXStyle* pOldStyle, CStringArray& strArray )
{
    // Unused:
    pOldStyle;

    // we assume that the significant segments are seperated by space

    // Please change m_strDelim to add other delimiters

    CString s;

    LPCTSTR psz = (LPCTSTR) str;

    BOOL bLastCharSpace = FALSE;
    DWORD size = str.GetLength()+1;

    // (newline will start a new row, tab delimiter will
    // move to the next column).
    // parse buffer (DBCS aware)
    for (DWORD nIndex = 0, nLast = 0; nIndex < size; nIndex += _tclen(psz+nIndex))
    {
        // check for a delimiter
        if (psz[nIndex] == _T('\0') || _tcschr(_T("\r\n"), psz[nIndex]) || _tcschr(_T(" "), psz[nIndex])
            ||!_tcscspn(&psz[nIndex], (LPCTSTR)m_strDelim))
        {
            s.ReleaseBuffer();
            s.Empty();
            // abort parsing the string if next char
            // is an end-of-string
            if (psz[nIndex] == _T('\0'))
            {
                if (psz[nIndex] == _T('\r') && psz[nIndex+1] == _T('\n'))
                    nIndex++;

                _tcsncpy_s(s.GetBuffer((int) (nIndex-nLast)),
                    nIndex-nLast,
                            psz+nLast,
                            (size_t) (nIndex-nLast));
                CString temStr = s;
                strArray.Add(temStr);
                temStr.Empty();
                break;
            }

            else if (_tcscspn(&psz[nIndex], (LPCTSTR)m_strDelim) == 0 && !bLastCharSpace)
            {
                if (psz[nIndex] == _T('\r') && psz[nIndex+1] == _T('\n'))
                    nIndex++;

                _tcsncpy_s(s.GetBuffer((int) (nIndex-nLast)),
                    nIndex-nLast,
                            psz+nLast,
                            (size_t) (nIndex-nLast));
                CString temStr = s;
                strArray.Add(temStr);
                temStr.Empty();
                bLastCharSpace = TRUE;
                // abort parsing the string if next char
                // is an end-of-string
                if (psz[nIndex+1] == _T('\0'))
                    break;

            }
            // Now, that the value has been copied to the cell,
            // let's check if we should jump to a new row.
            else if (_tcschr(_T(" "), psz[nIndex]) && !bLastCharSpace)
            {
                if (psz[nIndex] == _T('\r') && psz[nIndex+1] == _T('\n'))
                    nIndex++;

                _tcsncpy_s(s.GetBuffer((int) (nIndex-nLast)),
                    nIndex-nLast,
                            psz+nLast,
                            (size_t) (nIndex-nLast));
                CString temStr = s;
                strArray.Add(temStr);
                temStr.Empty();
                bLastCharSpace = TRUE;
                // abort parsing the string if next char
                // is an end-of-string
                if (psz[nIndex+1] == _T('\0'))
                    break;
            }

            nLast = nIndex + _tclen(psz+nIndex);


        }
        else
        {   
            // nLast = nIndex + _tclen(psz+nIndex);
            bLastCharSpace = FALSE;
        }
    }
    if (strArray.GetSize())
        return TRUE;
    else
        return FALSE;
}

EDIT: m_strDelim = _T(","); and this member variable is used in this function only. I suppose I see the point of tokenization now - it tries to parse a date and time ... wait, there is more! Here is the code which calls this function below. Please help me improve this as well. Some of my co-workers claim that C# makes them no more productive than C++. I used to feel like an idiot for not being able to say the same about me.

// SetControlText will attempt to convert the text to a valid date first with
// the help of COleDateTime and then with the help of the Date control and the
// current format

BOOL CLVGridDateTimeCtrl::ConvertControlTextToValue(CString& str, ROWCOL nRow, ROWCOL nCol, const CGXStyle* pOldStyle)
{
    CGXStyle* pStyle = NULL;
    BOOL bSuccess = FALSE;

    if (pOldStyle == NULL)
    {
        pStyle = Grid()->CreateStyle();
        Grid()->ComposeStyleRowCol(nRow, nCol, pStyle);
        pOldStyle = pStyle;
    }

    // allow only valid input
    {
        // First do this
        CLVDateTime dt;

        if (str.IsEmpty())
        {
            ;
            // if (Grid()->IsCurrentCell(nRow, nCol))
            //  Reset();
            bSuccess = TRUE;
        }
        else if (dt.ParseDateTime(str,CLVGlobals::IsUSDateFormat()) && (DATE) dt != 0)
        {
            SetDateTime(dt);
            if (m_bDateValueAsNumber)
                str.Format(_T("%g"), (DATE) dt);
            else
                str = dt.Format();
            bSuccess = TRUE;
        }
        else
        {
            // parse the string using the current format
            CStringArray strArray;
            if (!ParseTextWithCurrentFormat(str, pOldStyle, strArray))
                return FALSE;

            UpdateNullStatus(m_TextCtrlWnd);

            SetFormat(m_TextCtrlWnd, *pOldStyle);

            int nArrIndex = 0;
            for(int i=0; i<m_TextCtrlWnd.m_gadgets.GetSize(); i++)
            {
                int val = m_TextCtrlWnd.m_gadgets[i]->GetValue();   
                // s.Empty();
                if(m_TextCtrlWnd.m_gadgets[i]->IsKindOf(RUNTIME_CLASS(SECDTNumericGadget)))
                {
                    // TRACE(_T("The value %s\n"), strArray[nArrIndex]);
                    ((CLVDTNumericGadget*)m_TextCtrlWnd.m_gadgets[i])->m_nNewValue = _ttoi(strArray[nArrIndex]);    
                    nArrIndex++;
                    if (nArrIndex>strArray.GetUpperBound())
                            break;
                }
                else if(m_TextCtrlWnd.m_gadgets[i]->IsKindOf(RUNTIME_CLASS(SECDTListGadget)) && val!=-1)
                {
                    int nIndex = ((CLVDTListGadget*)m_TextCtrlWnd.m_gadgets[i])->FindMatch(strArray[nArrIndex], ((CLVDTListGadget*)m_TextCtrlWnd.m_gadgets[i])->GetValue()+1);
                    if (nIndex!=-1)
                    {
                        // TRACE(_T("The value %s\n"), strArray[nArrIndex]);
                        ((CLVDTListGadget*)m_TextCtrlWnd.m_gadgets[i])->SetValue(nIndex);
                        nArrIndex++;
                        if (nArrIndex>strArray.GetUpperBound())
                            break;
                    }

                }

                CLVDBValue dbDate = m_TextCtrlWnd.GetDateTime();
                if (dbDate.IsNull())
                    str = _T("");
                else
                {
                    CLVDateTime dt = (CLVDateTime)dbDate;
                    if (m_bDateValueAsNumber)
                        str.Format(_T("%g"), (DATE) dt);
                    else
                        str = dt.Format();
                }
            }
            bSuccess = TRUE;
        }
    }

    if (pStyle)
        Grid()->RecycleStyle(pStyle);

    return bSuccess;
}

解决方案

The String Toolkit Library (Strtk) has the following solution to your problem:

#include <string>
#include <deque>
#include "strtk.hpp"
int main()
{ 
   std::string data("kas\nhjkfh kjsdjkasf");
   std::deque<std::string> str_list;
   strtk::parse(data, ", \r\n", str_list);
   return 0;
}

More examples can be found Here

这篇关于一个更好的方法来将字符串分割成C / C字符串数组++使用空格作为分隔符的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持IT屋！

查看全文

一个更好的方法来将字符串分割成C / C字符串数组++使用空格作为分隔符 [英] A better way to split a string into an array of strings in C/C++ using whitespace as a delimiter

问题描述

相关文章

C/C++开发最新文章

热门教程

热门工具

登录关闭

一个更好的方法来将字符串分割成C / C字符串数组++使用空格作为分隔符 [英] A better way to split a string into an array of strings in C/C++ using whitespace as a delimiter

问题描述

相关文章

C/C++开发最新文章

热门教程

热门工具

登录 关闭

登录关闭