一个更好的方法来将字符串分割成C / C字符串数组++使用空格作为分隔符 [英] A better way to split a string into an array of strings in C/C++ using whitespace as a delimiter
问题描述
对不起,我的C / C ++并不好,但下面的现有code看起来像垃圾,甚至给我。它也有一个错误 - 当海峡=07/02/2010通过'\\ 0'终止失败 - 。我认为这不是一个固定的bug,那还不如重写。在Python它仅仅是'KAS \\ nhjkfh kjsdjkasf'.split()
。我知道这是C-ISH code,但它不能是复杂的分割线!坚持同样的签名,而无需使用额外的库,我怎么能改善它 - 让短暂的甜蜜?我可以告诉大家,这个code气味,例如由于else子句一路结尾。
LINE失败:
_tcsncpy_s(
s.GetBuffer((INT)(参数nIndex-n上次))
参数nIndex-n上次,
PSZ + n上次,
(为size_t)(参数nIndex-n上次)
);
通过字符串07/02/2010的终止'\\ 0',它会尝试为11个字符写入缓冲区是只长10个字符。
全部功能:
的#define//这将返回文本字符串作为一个字符串数组
//该功能从SetControlText称为解析
//文本字符串转换CStrings数组,该控制
//小工具将尝试间preTBOOL CLVGridDateTimeCtrl :: ParseTextWithCurrentFormat(常量的CString&放大器;海峡,常量CGXStyle * pOldStyle,CStringArray的&安培; strArray)
{
// 没用过:
pOldStyle; //我们假设显著段由空间分隔 //请更改m_strDelim添加其他分隔符 C字符串; LPCTSTR PSZ =(LPCTSTR)STR; BOOL bLastCharSpace = FALSE;
DWORD大小= str.GetLength()+ 1; //(新行会开始一个新行,的分隔符会
//移到下一列)。
//解析缓冲区(DBCS知道)
为(DWORD参数nIndex = 0,n上次= 0;参数nIndex<大小;参数nIndex + = _tclen(PSZ +参数nIndex))
{
//检查一个分隔符
如果(PSZ [参数nIndex] == _T('\\ 0')|| _tcschr(_T(\\ r \\ n),PSZ [参数nIndex])|| _tcschr(_T(),PSZ [参数nIndex])
!|| _tcscspn(安培; PSZ [参数nIndex],(LPCTSTR)m_strDelim))
{
s.ReleaseBuffer();
s.Empty();
//中断不作解析,如果下一个字符字符串
//是结束串
如果(PSZ [参数nIndex] == _T('\\ 0'))
{
如果(PSZ [参数nIndex] == _T('\\ r')及和放大器; PSZ [参数nIndex + 1] == _T('\\ n'))
参数nIndex ++; _tcsncpy_s(s.GetBuffer((INT)(参数nIndex-n上次))
参数nIndex-n上次,
PSZ + n上次,
(为size_t)(参数nIndex-n上次));
CString的temStr = S;
strArray.Add(temStr);
temStr.Empty();
打破;
} 否则,如果(_tcscspn(安培; PSZ [参数nIndex],(LPCTSTR)m_strDelim)== 0安培;&安培;!bLastCharSpace)
{
如果(PSZ [参数nIndex] == _T('\\ r')及和放大器; PSZ [参数nIndex + 1] == _T('\\ n'))
参数nIndex ++; _tcsncpy_s(s.GetBuffer((INT)(参数nIndex-n上次))
参数nIndex-n上次,
PSZ + n上次,
(为size_t)(参数nIndex-n上次));
CString的temStr = S;
strArray.Add(temStr);
temStr.Empty();
bLastCharSpace = TRUE;
//中断不作解析,如果下一个字符字符串
//是结束串
如果(PSZ [参数nIndex + 1] == _T('\\ 0'))
打破; }
//现在,该值已经被拷贝到细胞内,
//让我们检查,如果我们要跳转到一个新行。
否则如果(_tcschr(_T(),PSZ [参数nIndex])及&放大器;!bLastCharSpace)
{
如果(PSZ [参数nIndex] == _T('\\ r')及和放大器; PSZ [参数nIndex + 1] == _T('\\ n'))
参数nIndex ++; _tcsncpy_s(s.GetBuffer((INT)(参数nIndex-n上次))
参数nIndex-n上次,
PSZ + n上次,
(为size_t)(参数nIndex-n上次));
CString的temStr = S;
strArray.Add(temStr);
temStr.Empty();
bLastCharSpace = TRUE;
//中断不作解析,如果下一个字符字符串
//是结束串
如果(PSZ [参数nIndex + 1] == _T('\\ 0'))
打破;
} n上次=参数nIndex + _tclen(PSZ +参数nIndex);
}
其他
{
// n上次=参数nIndex + _tclen(PSZ +参数nIndex);
bLastCharSpace = FALSE;
}
}
如果(strArray.GetSize())
返回TRUE;
其他
返回FALSE;
}
的 修改的 m_strDelim = _T();
这个成员变量只在这个函数中使用。我想我现在看到符号化的地步 - 它试图解析日期和时间...等待,还有更多!这里是code这下面调用此函数。请帮我改善这一点。我的一些同事声称,C#使他们不会比C ++更高效。我曾经觉得自己不能够说我一样白痴。
// SetControlText将尝试用文字先转换为有效日期
//的COleDateTime的帮助下,然后用日期控件的帮助和
//当前格式BOOL CLVGridDateTimeCtrl :: ConvertControlTextToValue(CString的放大器; STR,ROWCOL nRow,ROWCOL NCOL,常量CGXStyle * pOldStyle)
{
CGXStyle * PSTYLE = NULL;
BOOL bSuccess = FALSE; 如果(pOldStyle == NULL)
{
PSTYLE =网格() - GT; CreateStyle();
网格() - GT; ComposeStyleRowCol(nRow,NCOL,PSTYLE);
pOldStyle = PSTYLE;
} //只允许有效的输入
{
//首先做到这一点
CLVDateTime DT; 如果(str.IsEmpty())
{
;
//如果(网格() - GT; IsCurrentCell(nRow,NCOL))
// 重启();
bSuccess = TRUE;
}
否则如果(dt.ParseDateTime(STR,CLVGlobals :: IsUSDateFormat())及&放大器;!(DATE)dt的= 0)
{
SetDateTime(DT);
如果(m_bDateValueAsNumber)
str.Format(_T(%G),(DATE)DT);
其他
海峡= dt.Format();
bSuccess = TRUE;
}
其他
{
//使用当前格式解析字符串
CStringArray的strArray;
如果(!ParseTextWithCurrentFormat(STR,pOldStyle,strArray))
返回FALSE; UpdateNullStatus(m_TextCtrlWnd); 和setFormat(m_TextCtrlWnd,* pOldStyle); INT nArrIndex = 0;
的for(int i = 0; I< m_TextCtrlWnd.m_gadgets.GetSize();我++)
{
INT VAL = m_TextCtrlWnd.m_gadgets [Ⅰ] - GT;的GetValue();
// s.Empty();
如果(m_TextCtrlWnd.m_gadgets [Ⅰ] - GT; IsKindOf(RUNTIME_CLASS(SECDTNumericGadget)))
{
// TRACE(_T(值%S \\ n),strArray [nArrIndex]);
((CLVDTNumericGadget *)m_TextCtrlWnd.m_gadgets [I]) - GT; m_nNewValue = _ttoi(strArray [nArrIndex]);
nArrIndex ++;
如果(nArrIndex> strArray.GetUpperBound())
打破;
}
否则如果(m_TextCtrlWnd.m_gadgets [Ⅰ] - GT; IsKindOf(RUNTIME_CLASS(SECDTListGadget))及&放大器;!VAL = - 1)
{
国际nIndex =((CLVDTListGadget *)m_TextCtrlWnd.m_gadgets [I]) - > FindMatch(strArray [nArrIndex],((CLVDTListGadget *)m_TextCtrlWnd.m_gadgets [I]) - >的GetValue()+ 1);
如果(参数nIndex!= - 1)
{
// TRACE(_T(值%S \\ n),strArray [nArrIndex]);
((CLVDTListGadget *)m_TextCtrlWnd.m_gadgets [I]) - GT;的SetValue(参数nIndex);
nArrIndex ++;
如果(nArrIndex> strArray.GetUpperBound())
打破;
} } CLVDBValue DBDATE = m_TextCtrlWnd.GetDateTime();
如果(dbDate.IsNull())
海峡= _T();
其他
{
CLVDateTime DT =(CLVDateTime)DBDATE;
如果(m_bDateValueAsNumber)
str.Format(_T(%G),(DATE)DT);
其他
海峡= dt.Format();
}
}
bSuccess = TRUE;
}
} 如果(PSTYLE)
网格() - GT; RecycleStyle(PSTYLE); 返回bSuccess;
}
的#include<串GT;
#包括LT&;&双端GT;
#包括strtk.hpp
诠释的main()
{
标准::字符串数据(KAS \\ nhjkfh kjsdjkasf);
的std :: deque的<标准::字符串>为str_list;
strtk ::解析(数据,\\ r \\ n,为str_list);
返回0;
}
更多的例子可以发现这里
Sorry, my C/C++ is not that good, but the following existing code looks like garbage even to me. It also has a bug - fails when str = "07/02/2010" terminated by '\0' - . I think that instead of fixing a bug, it might as well be rewritten. In Python it is just 'kas\nhjkfh kjsdjkasf'.split()
. I know this is C-ish code, but it cannot be that complicated to split a string! Sticking to the same signature, and without using extra libraries, how can I improve it - make it short and sweet? I can tell that this code smells, for instance because of the else clause all the way at the end.
LINE THAT FAILS:
_tcsncpy_s(
s.GetBuffer((int) (nIndex-nLast)),
nIndex-nLast,
psz+nLast,
(size_t) (nIndex-nLast)
);
With the string "07/02/2010" terminated by '\0' it will try to write 11 characters into a buffer that is only 10 characters long.
FULL FUNCTION:
#define
// This will return the text string as a string array
// This function is called from SetControlText to parse the
// text string into an array of CStrings that the control
// Gadgets will attempt to interpret
BOOL CLVGridDateTimeCtrl::ParseTextWithCurrentFormat(const CString& str, const CGXStyle* pOldStyle, CStringArray& strArray )
{
// Unused:
pOldStyle;
// we assume that the significant segments are seperated by space
// Please change m_strDelim to add other delimiters
CString s;
LPCTSTR psz = (LPCTSTR) str;
BOOL bLastCharSpace = FALSE;
DWORD size = str.GetLength()+1;
// (newline will start a new row, tab delimiter will
// move to the next column).
// parse buffer (DBCS aware)
for (DWORD nIndex = 0, nLast = 0; nIndex < size; nIndex += _tclen(psz+nIndex))
{
// check for a delimiter
if (psz[nIndex] == _T('\0') || _tcschr(_T("\r\n"), psz[nIndex]) || _tcschr(_T(" "), psz[nIndex])
||!_tcscspn(&psz[nIndex], (LPCTSTR)m_strDelim))
{
s.ReleaseBuffer();
s.Empty();
// abort parsing the string if next char
// is an end-of-string
if (psz[nIndex] == _T('\0'))
{
if (psz[nIndex] == _T('\r') && psz[nIndex+1] == _T('\n'))
nIndex++;
_tcsncpy_s(s.GetBuffer((int) (nIndex-nLast)),
nIndex-nLast,
psz+nLast,
(size_t) (nIndex-nLast));
CString temStr = s;
strArray.Add(temStr);
temStr.Empty();
break;
}
else if (_tcscspn(&psz[nIndex], (LPCTSTR)m_strDelim) == 0 && !bLastCharSpace)
{
if (psz[nIndex] == _T('\r') && psz[nIndex+1] == _T('\n'))
nIndex++;
_tcsncpy_s(s.GetBuffer((int) (nIndex-nLast)),
nIndex-nLast,
psz+nLast,
(size_t) (nIndex-nLast));
CString temStr = s;
strArray.Add(temStr);
temStr.Empty();
bLastCharSpace = TRUE;
// abort parsing the string if next char
// is an end-of-string
if (psz[nIndex+1] == _T('\0'))
break;
}
// Now, that the value has been copied to the cell,
// let's check if we should jump to a new row.
else if (_tcschr(_T(" "), psz[nIndex]) && !bLastCharSpace)
{
if (psz[nIndex] == _T('\r') && psz[nIndex+1] == _T('\n'))
nIndex++;
_tcsncpy_s(s.GetBuffer((int) (nIndex-nLast)),
nIndex-nLast,
psz+nLast,
(size_t) (nIndex-nLast));
CString temStr = s;
strArray.Add(temStr);
temStr.Empty();
bLastCharSpace = TRUE;
// abort parsing the string if next char
// is an end-of-string
if (psz[nIndex+1] == _T('\0'))
break;
}
nLast = nIndex + _tclen(psz+nIndex);
}
else
{
// nLast = nIndex + _tclen(psz+nIndex);
bLastCharSpace = FALSE;
}
}
if (strArray.GetSize())
return TRUE;
else
return FALSE;
}
EDIT:
m_strDelim = _T(",");
and this member variable is used in this function only. I suppose I see the point of tokenization now - it tries to parse a date and time ... wait, there is more! Here is the code which calls this function below. Please help me improve this as well. Some of my co-workers claim that C# makes them no more productive than C++. I used to feel like an idiot for not being able to say the same about me.
// SetControlText will attempt to convert the text to a valid date first with
// the help of COleDateTime and then with the help of the Date control and the
// current format
BOOL CLVGridDateTimeCtrl::ConvertControlTextToValue(CString& str, ROWCOL nRow, ROWCOL nCol, const CGXStyle* pOldStyle)
{
CGXStyle* pStyle = NULL;
BOOL bSuccess = FALSE;
if (pOldStyle == NULL)
{
pStyle = Grid()->CreateStyle();
Grid()->ComposeStyleRowCol(nRow, nCol, pStyle);
pOldStyle = pStyle;
}
// allow only valid input
{
// First do this
CLVDateTime dt;
if (str.IsEmpty())
{
;
// if (Grid()->IsCurrentCell(nRow, nCol))
// Reset();
bSuccess = TRUE;
}
else if (dt.ParseDateTime(str,CLVGlobals::IsUSDateFormat()) && (DATE) dt != 0)
{
SetDateTime(dt);
if (m_bDateValueAsNumber)
str.Format(_T("%g"), (DATE) dt);
else
str = dt.Format();
bSuccess = TRUE;
}
else
{
// parse the string using the current format
CStringArray strArray;
if (!ParseTextWithCurrentFormat(str, pOldStyle, strArray))
return FALSE;
UpdateNullStatus(m_TextCtrlWnd);
SetFormat(m_TextCtrlWnd, *pOldStyle);
int nArrIndex = 0;
for(int i=0; i<m_TextCtrlWnd.m_gadgets.GetSize(); i++)
{
int val = m_TextCtrlWnd.m_gadgets[i]->GetValue();
// s.Empty();
if(m_TextCtrlWnd.m_gadgets[i]->IsKindOf(RUNTIME_CLASS(SECDTNumericGadget)))
{
// TRACE(_T("The value %s\n"), strArray[nArrIndex]);
((CLVDTNumericGadget*)m_TextCtrlWnd.m_gadgets[i])->m_nNewValue = _ttoi(strArray[nArrIndex]);
nArrIndex++;
if (nArrIndex>strArray.GetUpperBound())
break;
}
else if(m_TextCtrlWnd.m_gadgets[i]->IsKindOf(RUNTIME_CLASS(SECDTListGadget)) && val!=-1)
{
int nIndex = ((CLVDTListGadget*)m_TextCtrlWnd.m_gadgets[i])->FindMatch(strArray[nArrIndex], ((CLVDTListGadget*)m_TextCtrlWnd.m_gadgets[i])->GetValue()+1);
if (nIndex!=-1)
{
// TRACE(_T("The value %s\n"), strArray[nArrIndex]);
((CLVDTListGadget*)m_TextCtrlWnd.m_gadgets[i])->SetValue(nIndex);
nArrIndex++;
if (nArrIndex>strArray.GetUpperBound())
break;
}
}
CLVDBValue dbDate = m_TextCtrlWnd.GetDateTime();
if (dbDate.IsNull())
str = _T("");
else
{
CLVDateTime dt = (CLVDateTime)dbDate;
if (m_bDateValueAsNumber)
str.Format(_T("%g"), (DATE) dt);
else
str = dt.Format();
}
}
bSuccess = TRUE;
}
}
if (pStyle)
Grid()->RecycleStyle(pStyle);
return bSuccess;
}
The String Toolkit Library (Strtk) has the following solution to your problem:
#include <string>
#include <deque>
#include "strtk.hpp"
int main()
{
std::string data("kas\nhjkfh kjsdjkasf");
std::deque<std::string> str_list;
strtk::parse(data, ", \r\n", str_list);
return 0;
}
More examples can be found Here
这篇关于一个更好的方法来将字符串分割成C / C字符串数组++使用空格作为分隔符的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!