如何摆脱XML中的垃圾值? [英] How do I get rid of garbage value in XML?
本文介绍了如何摆脱XML中的垃圾值?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
您好,
我正在创建一个XML文件并且已成功创建,但在XML文件中,我得到的文字如下:
Re?publican承诺法律将在几个月内废除
这里,Re之后的块是 STX 。请参考以下链接中的图片:
https://pasteboard.co/HlzdUOR.png
我尝试过:
private void btn_CreateArticles_Click(object sender,EventArgs e)
{
this.Cursor = Cursors.WaitCursor;
if(!string.IsNullOrEmpty(StructureFileName))
{
if(System.IO.File.Exists(StructureFileName))
{
string UnpackDirectory = ;
string UnpackFile =;
UnpackDirectory = System.IO.Path.GetDirectoryName(StructureFileName);
UnpackFile = System.IO.Path.GetFileNameWithoutExtension(StructureFileName);
string EpubFolder =;
EpubFolder = UnpackDirectory +\\+ UnpackFile +_ Epub;
if(!(System.IO.Directory.Exists(EpubFolder)))
{
System.IO.Directory.CreateDirectory(EpubFolder);
}
if(!(System.IO.Directory.Exists(EpubFolder +\\OPS)))
{
System.IO.Directory.CreateDirectory( EpubFolder +\\OPS);
}
string StrBooKTitle =;
StrBooKTitle = txt_BookTitle.Text;
if(Regex.IsMatch(StrBooKTitle,\ n+|+\ r,RegexOptions.Multiline))
{
StrBooKTitle = Regex.Replace(StrBooKTitle, \ n+|+\ r,,RegexOptions.Multiline);
}
//在StrBookTitle上使用Replacer函数
if(!string.IsNullOrEmpty(StrBooKTitle.Trim('')))
{
if(DataGridView1.RowCount> 0)
{
string ArticleHeadStr =;
ArticleHeadStr = ArticleHeadStr +<?xml version = \1.0 \encoding = \utf-8 \standalone = \no \?> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +<!DOCTYPE html PUBLIC \ - // W3C // DTD XHTML 1.1 // EN \\http://www.w3.org/TR/xhtml11/ DTD / xhtml11.dtd\ > 中+ Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< html xmlns:saxon = \http://saxon.sf.net/\xmlns = \http://www.w3.org/1999/xhtml \xmlns:nitf = \http://www.nytimes.com/applicationdata/xml/nitf-3-3.dtd\> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< head> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< title> + StrBooKTitle +< / title> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< link rel = \stylesheet \href = \css / TablesAndFloats.css \type = \text / css \/> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< / head> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< body> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< div class = \clean\/> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< div id = \header\class = \headerhead \> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< div class = \masthead-text \> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< div id = \header_title \class = \masthead-section \> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< / div> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< / div> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< / div> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< div> + Environment.NewLine;
string ArticleFootStr =;
ArticleFootStr = Environment.NewLine +< / div> + Environment.NewLine +< / body> + Environment.NewLine +< / html>;
string TempArticleHeadStr =;
string TempArticleStr =;
string TempArticleFileName =;
int ArticleGreaterFileName = 0;
for(var irow = 0; irow< DataGridView1.RowCount; irow ++)
{
if(DataGridView1.Rows [irow] .Cells [0] .Value!= null)
{
if((Encoding.Unicode.GetByteCount(TempArticleStr)/ 1024.0)> 270)
{
TempArticleStr = ArticleHeadStr + TempArticleStr + ArticleFootStr;
ArticleGreaterFileName = ArticleGreaterFileName + 1;
string STempArticleFileName = TempArticleFileName.Replace(。xml,ArticleGreaterFileName +。xml);
TempArticleStr = EssentialTextReplace(TempArticleStr).ToString();
Common.WriteFile(STempArticleFileName,TempArticleStr);
TempArticleStr =;
}
if((string)DataGridView1.Rows [irow] .Cells [2] .Value ==article-full-headline)
{
if(!string .IsNullOrEmpty(TempArticleStr))
{
TempArticleStr = TempArticleHeadStr + TempArticleStr + ArticleFootStr;
// EssentialTextReplace
TempArticleStr = EssentialTextReplace(TempArticleStr).ToString();
if(ArticleGreaterFileName == 0)
{
Common.WriteFile(TempArticleFileName,TempArticleStr);
}
else
{
ArticleGreaterFileName = ArticleGreaterFileName + 1;
string STempArticleFileName = TempArticleFileName.Replace(。xml,ArticleGreaterFileName +。xml);
Common.WriteFile(STempArticleFileName,TempArticleStr);
}
TempArticleStr =;
ArticleGreaterFileName = 0;
}
TempArticleFileName = EpubFolder +\\OPS\\article_+ Convert.ToString(DataGridView1.Rows [irow] .Cells [0] .Value).Trim('') + - + Convert.ToString(DataGridView1.Rows [irow] .Cells [1] .Value).Trim('')+。xml;
TempArticleHeadStr = ArticleHeadStr;
if((string)DataGridView1.Rows [irow] .Cells [4] .Value!=)
{
TempArticleHeadStr = TempArticleHeadStr.Replace(@@@,Convert。 ToString(DataGridView1.Rows [irow] .Cells [4] .Value).Trim(''));
}
if((string)DataGridView1.Rows [irow] .Cells [3] .Value!=)
{
if(string.IsNullOrEmpty(TempArticleStr))
{
TempArticleStr = TempArticleStr + Convert.ToString(DataGridView1.Rows [irow] .Cells [3] .Value).Trim('');
}
其他
{
TempArticleStr = TempArticleStr + Environment.NewLine + Convert.ToString(DataGridView1.Rows [irow] .Cells [3] .Value).Trim('' );
}
}
}
else if((string)DataGridView1.Rows [irow] .Cells [2] .Value ==sectionName)
{
继续;
}
else if((string)DataGridView1.Rows [irow] .Cells [2] .Value ==articleImageCaption)
{
var test = true;
if((string)DataGridView1.Rows [irow] .Cells [3] .Value!=)
{
string Tstr = Convert.ToString(DataGridView1.Rows [irow]。单元格[3] .Value).Trim('');
string tsearchStr =@+ DataGridView1.Rows [irow] .Cells [0] .Value +_+ DataGridView1.Rows [irow] .Cells [10] .Value +_ caption;
if(TempArticleStr.Contains(tsearchStr))
{
TempArticleStr = TempArticleStr.Replace(tsearchStr,Tstr);
}
}
}
其他
{
if((string)DataGridView1.Rows [irow] .Cells [3] .Value!= )
{
if(string.IsNullOrEmpty(TempArticleStr))
{
TempArticleStr = TempArticleStr + Convert.ToString(DataGridView1.Rows [irow] .Cells [3] .Value ).Trim('');
}
其他
{
TempArticleStr = TempArticleStr + Environment.NewLine + Convert.ToString(DataGridView1.Rows [irow] .Cells [3] .Value).Trim('' );
}
}
}
}
}
if(!string.IsNullOrEmpty(TempArticleStr))
{
TempArticleStr = TempArticleHeadStr + TempArticleStr + ArticleFootStr;
TempArticleStr = EssentialTextReplace(TempArticleStr).ToString();
if(ArticleGreaterFileName == 0)
{
Common.WriteFile(TempArticleFileName,TempArticleStr);
}
else
{
ArticleGreaterFileName = ArticleGreaterFileName + 1;
string STempArticleFileName = TempArticleFileName.Replace(。xml,ArticleGreaterFileName +。xml);
Common.WriteFile(STempArticleFileName,TempArticleStr);
}
TempArticleStr =;
ArticleGreaterFileName = 0;
}
}
btn_CreateArticles.ForeColor = System.Drawing.Color.Red;
MessageBox.Show(Completed Articles,Epub Articles,MessageBoxButtons.OK,MessageBoxIcon.Information);
}
else
{
MessageBox.Show(Book Title Blank,Error Epub Articles,MessageBoxButtons.OK,MessageBoxIcon.Error);
}
}
else
{
MessageBox.Show(结构文件路径不正确,错误电子文章,MessageBoxButtons.OK,MessageBoxIcon.Error) ;
}
}
其他
{
MessageBox.Show(未加载结构文件,错误Epub文章,MessageBoxButtons.OK,MessageBoxIcon.Error);
}
this.Cursor = Cursors.Default;
}
解决方案
STX是控制角色 [ ^ ] 。您可以从字符串中删除它们:
var stringWithoutControlCharacters = new string(originalString.Where(c =>!char.IsControl) (C))ToArray的());
Hello,
I am creating an XML file and it is successfully being created, but in the XML file, I am getting text like this:
Republican promises that the law will be repealed within months
Here, the block after Re is STX. Please refer to the image in the link below:
https://pasteboard.co/HlzdUOR.png
What I have tried:
private void btn_CreateArticles_Click(object sender, EventArgs e) { this.Cursor = Cursors.WaitCursor; if (!string.IsNullOrEmpty(StructureFileName)) { if (System.IO.File.Exists(StructureFileName)) { string UnpackDirectory = ""; string UnpackFile = ""; UnpackDirectory = System.IO.Path.GetDirectoryName(StructureFileName); UnpackFile = System.IO.Path.GetFileNameWithoutExtension(StructureFileName); string EpubFolder = ""; EpubFolder = UnpackDirectory + "\\" + UnpackFile + "_Epub"; if (!(System.IO.Directory.Exists(EpubFolder))) { System.IO.Directory.CreateDirectory(EpubFolder); } if (!(System.IO.Directory.Exists(EpubFolder + "\\OPS"))) { System.IO.Directory.CreateDirectory(EpubFolder + "\\OPS"); } string StrBooKTitle = ""; StrBooKTitle = txt_BookTitle.Text; if (Regex.IsMatch(StrBooKTitle, "\n" + "|" + "\r", RegexOptions.Multiline)) { StrBooKTitle = Regex.Replace(StrBooKTitle, "\n" + "|" + "\r", "", RegexOptions.Multiline); } //Use Replacer Function here on StrBookTitle if (!string.IsNullOrEmpty(StrBooKTitle.Trim(' '))) { if (DataGridView1.RowCount > 0) { string ArticleHeadStr = ""; ArticleHeadStr = ArticleHeadStr + "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>" + Environment.NewLine; ArticleHeadStr = ArticleHeadStr + "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">" + Environment.NewLine; ArticleHeadStr = ArticleHeadStr + "<html xmlns:saxon=\"http://saxon.sf.net/\" xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:nitf=\"http://www.nytimes.com/applicationdata/xml/nitf-3-3.dtd\">" + Environment.NewLine; ArticleHeadStr = ArticleHeadStr + "<head>" + Environment.NewLine; ArticleHeadStr = ArticleHeadStr + "<title>" + StrBooKTitle + "</title>" + Environment.NewLine; ArticleHeadStr = ArticleHeadStr + "<link rel=\"stylesheet\" href=\"css/TablesAndFloats.css\" type=\"text/css\"/>" + Environment.NewLine; ArticleHeadStr = ArticleHeadStr + "</head>" + Environment.NewLine; ArticleHeadStr = ArticleHeadStr + "<body>" + Environment.NewLine; ArticleHeadStr = ArticleHeadStr + "<div class=\"clean\"/>" + Environment.NewLine; ArticleHeadStr = ArticleHeadStr + "<div id=\"header\" class=\"masthead\">" + Environment.NewLine; ArticleHeadStr = ArticleHeadStr + "<div class=\"masthead-text\">" + Environment.NewLine; ArticleHeadStr = ArticleHeadStr + "<div id=\"header_title\" class=\"masthead-section\">" + Environment.NewLine; ArticleHeadStr = ArticleHeadStr + "</div>" + Environment.NewLine; ArticleHeadStr = ArticleHeadStr + "</div>" + Environment.NewLine; ArticleHeadStr = ArticleHeadStr + "</div>" + Environment.NewLine; ArticleHeadStr = ArticleHeadStr + "<div>" + Environment.NewLine; string ArticleFootStr = ""; ArticleFootStr = Environment.NewLine + "</div>" + Environment.NewLine + "</body>" + Environment.NewLine + "</html>"; string TempArticleHeadStr = ""; string TempArticleStr = ""; string TempArticleFileName = ""; int ArticleGreaterFileName = 0; for (var irow = 0; irow < DataGridView1.RowCount; irow++) { if (DataGridView1.Rows[irow].Cells[0].Value != null) { if ((Encoding.Unicode.GetByteCount(TempArticleStr) / 1024.0) > 270) { TempArticleStr = ArticleHeadStr + TempArticleStr + ArticleFootStr; ArticleGreaterFileName = ArticleGreaterFileName + 1; string STempArticleFileName = TempArticleFileName.Replace(".xml", ArticleGreaterFileName + ".xml"); TempArticleStr = EssentialTextReplace(TempArticleStr).ToString(); Common.WriteFile(STempArticleFileName, TempArticleStr); TempArticleStr = ""; } if ((string)DataGridView1.Rows[irow].Cells[2].Value == "article-full-headline") { if (!string.IsNullOrEmpty(TempArticleStr)) { TempArticleStr = TempArticleHeadStr + TempArticleStr + ArticleFootStr; //EssentialTextReplace TempArticleStr = EssentialTextReplace(TempArticleStr).ToString(); if (ArticleGreaterFileName == 0) { Common.WriteFile(TempArticleFileName, TempArticleStr); } else { ArticleGreaterFileName = ArticleGreaterFileName + 1; string STempArticleFileName = TempArticleFileName.Replace(".xml", ArticleGreaterFileName + ".xml"); Common.WriteFile(STempArticleFileName, TempArticleStr); } TempArticleStr = ""; ArticleGreaterFileName = 0; } TempArticleFileName = EpubFolder + "\\OPS\\article_" + Convert.ToString(DataGridView1.Rows[irow].Cells[0].Value).Trim(' ') + "-" + Convert.ToString(DataGridView1.Rows[irow].Cells[1].Value).Trim(' ') + ".xml"; TempArticleHeadStr = ArticleHeadStr; if ((string)DataGridView1.Rows[irow].Cells[4].Value != "") { TempArticleHeadStr = TempArticleHeadStr.Replace("@@@", Convert.ToString(DataGridView1.Rows[irow].Cells[4].Value).Trim(' ')); } if ((string)DataGridView1.Rows[irow].Cells[3].Value != "") { if (string.IsNullOrEmpty(TempArticleStr)) { TempArticleStr = TempArticleStr + Convert.ToString(DataGridView1.Rows[irow].Cells[3].Value).Trim(' '); } else { TempArticleStr = TempArticleStr + Environment.NewLine + Convert.ToString(DataGridView1.Rows[irow].Cells[3].Value).Trim(' '); } } } else if ((string)DataGridView1.Rows[irow].Cells[2].Value == "sectionName") { continue; } else if ((string)DataGridView1.Rows[irow].Cells[2].Value == "articleImageCaption") { var test = true; if ((string)DataGridView1.Rows[irow].Cells[3].Value != "") { string Tstr = Convert.ToString(DataGridView1.Rows[irow].Cells[3].Value).Trim(' '); string tsearchStr = "@" + DataGridView1.Rows[irow].Cells[0].Value + "_" + DataGridView1.Rows[irow].Cells[10].Value + "_caption"; if (TempArticleStr.Contains(tsearchStr)) { TempArticleStr = TempArticleStr.Replace(tsearchStr, Tstr); } } } else { if ((string)DataGridView1.Rows[irow].Cells[3].Value != "") { if (string.IsNullOrEmpty(TempArticleStr)) { TempArticleStr = TempArticleStr + Convert.ToString(DataGridView1.Rows[irow].Cells[3].Value).Trim(' '); } else { TempArticleStr = TempArticleStr + Environment.NewLine + Convert.ToString(DataGridView1.Rows[irow].Cells[3].Value).Trim(' '); } } } } } if (!string.IsNullOrEmpty(TempArticleStr)) { TempArticleStr = TempArticleHeadStr + TempArticleStr + ArticleFootStr; TempArticleStr = EssentialTextReplace(TempArticleStr).ToString(); if (ArticleGreaterFileName == 0) { Common.WriteFile(TempArticleFileName, TempArticleStr); } else { ArticleGreaterFileName = ArticleGreaterFileName + 1; string STempArticleFileName = TempArticleFileName.Replace(".xml", ArticleGreaterFileName + ".xml"); Common.WriteFile(STempArticleFileName, TempArticleStr); } TempArticleStr = ""; ArticleGreaterFileName = 0; } } btn_CreateArticles.ForeColor = System.Drawing.Color.Red; MessageBox.Show("Completed Articles", "Epub Articles", MessageBoxButtons.OK, MessageBoxIcon.Information); } else { MessageBox.Show("Book Title Blank", "Error Epub Articles", MessageBoxButtons.OK, MessageBoxIcon.Error); } } else { MessageBox.Show("Structure File Path Not Correct", "Error Epub Articles", MessageBoxButtons.OK, MessageBoxIcon.Error); } } else { MessageBox.Show("Structure File Not Loaded", "Error Epub Articles", MessageBoxButtons.OK, MessageBoxIcon.Error); } this.Cursor = Cursors.Default; }
解决方案
STX is a control character[^]. You can remove those from a string like so:
var stringWithoutControlCharacters = new string(originalString.Where(c => !char.IsControl(c)).ToArray());
这篇关于如何摆脱XML中的垃圾值?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!
查看全文