如何摆脱XML中的垃圾值? [英] How do I get rid of garbage value in XML?

查看:58
本文介绍了如何摆脱XML中的垃圾值?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

您好,



我正在创建一个XML文件并且已成功创建,但在XML文件中,我得到的文字如下:



 Re?publican承诺法律将在几个月内废除





这里,Re之后的块是 STX 。请参考以下链接中的图片:



https://pasteboard.co/HlzdUOR.png



我尝试过:



 private void btn_CreateArticles_Click(object sender,EventArgs e)
{
this.Cursor = Cursors.WaitCursor;
if(!string.IsNullOrEmpty(StructureFileName))
{
if(System.IO.File.Exists(StructureFileName))
{
string UnpackDirectory = ;
string UnpackFile =;
UnpackDirectory = System.IO.Path.GetDirectoryName(StructureFileName);
UnpackFile = System.IO.Path.GetFileNameWithoutExtension(StructureFileName);
string EpubFolder =;
EpubFolder = UnpackDirectory +\\+ UnpackFile +_ Epub;
if(!(System.IO.Directory.Exists(EpubFolder)))
{
System.IO.Directory.CreateDirectory(EpubFolder);
}
if(!(System.IO.Directory.Exists(EpubFolder +\\OPS)))
{
System.IO.Directory.CreateDirectory( EpubFolder +\\OPS);
}
string StrBooKTitle =;
StrBooKTitle = txt_BookTitle.Text;
if(Regex.IsMatch(StrBooKTitle,\ n+|+\ r,RegexOptions.Multiline))
{
StrBooKTitle = Regex.Replace(StrBooKTitle, \ n+|+\ r,,RegexOptions.Multiline);
}
//在StrBookTitle上使用Replacer函数

if(!string.IsNullOrEmpty(StrBooKTitle.Trim('')))
{
if(DataGridView1.RowCount> 0)
{
string ArticleHeadStr =;
ArticleHeadStr = ArticleHeadStr +<?xml version = \1.0 \encoding = \utf-8 \standalone = \no \?> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +<!DOCTYPE html PUBLIC \ - // W3C // DTD XHTML 1.1 // EN \\http://www.w3.org/TR/xhtml11/ DTD / xhtml11.dtd\ > 中+ Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< html xmlns:saxon = \http://saxon.sf.net/\xmlns = \http://www.w3.org/1999/xhtml \xmlns:nitf = \http://www.nytimes.com/applicationdata/xml/nitf-3-3.dtd\> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< head> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< title> + StrBooKTitle +< / title> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< link rel = \stylesheet \href = \css / TablesAndFloats.css \type = \text / css \/> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< / head> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< body> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< div class = \clean\/> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< div id = \header\class = \headerhead \> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< div class = \masthead-text \> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< div id = \header_title \class = \masthead-section \> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< / div> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< / div> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< / div> + Environment.NewLine;
ArticleHeadStr = ArticleHeadStr +< div> + Environment.NewLine;
string ArticleFootStr =;
ArticleFootStr = Environment.NewLine +< / div> + Environment.NewLine +< / body> + Environment.NewLine +< / html>;
string TempArticleHeadStr =;
string TempArticleStr =;
string TempArticleFileName =;
int ArticleGreaterFileName = 0;
for(var irow = 0; irow< DataGridView1.RowCount; irow ++)
{
if(DataGridView1.Rows [irow] .Cells [0] .Value!= null)
{
if((Encoding.Unicode.GetByteCount(TempArticleStr)/ 1024.0)> 270)
{
TempArticleStr = ArticleHeadStr + TempArticleStr + ArticleFootStr;
ArticleGreaterFileName = ArticleGreaterFileName + 1;
string STempArticleFileName = TempArticleFileName.Replace(。xml,ArticleGreaterFileName +。xml);
TempArticleStr = EssentialTextReplace(TempArticleStr).ToString();
Common.WriteFile(STempArticleFileName,TempArticleStr);
TempArticleStr =;
}
if((string)DataGridView1.Rows [irow] .Cells [2] .Value ==article-full-headline)
{
if(!string .IsNullOrEmpty(TempArticleStr))
{
TempArticleStr = TempArticleHeadStr + TempArticleStr + ArticleFootStr;
// EssentialTextReplace
TempArticleStr = EssentialTextReplace(TempArticleStr).ToString();

if(ArticleGreaterFileName == 0)
{
Common.WriteFile(TempArticleFileName,TempArticleStr);
}
else
{
ArticleGreaterFileName = ArticleGreaterFileName + 1;
string STempArticleFileName = TempArticleFileName.Replace(。xml,ArticleGreaterFileName +。xml);
Common.WriteFile(STempArticleFileName,TempArticleStr);
}
TempArticleStr =;
ArticleGreaterFileName = 0;
}
TempArticleFileName = EpubFolder +\\OPS\\article_+ Convert.ToString(DataGridView1.Rows [irow] .Cells [0] .Value).Trim('') + - + Convert.ToString(DataGridView1.Rows [irow] .Cells [1] .Value).Trim('')+。xml;
TempArticleHeadStr = ArticleHeadStr;
if((string)DataGridView1.Rows [irow] .Cells [4] .Value!=)
{
TempArticleHeadStr = TempArticleHeadStr.Replace(@@@,Convert。 ToString(DataGridView1.Rows [irow] .Cells [4] .Value).Trim(''));
}
if((string)DataGridView1.Rows [irow] .Cells [3] .Value!=)
{
if(string.IsNullOrEmpty(TempArticleStr))
{
TempArticleStr = TempArticleStr + Convert.ToString(DataGridView1.Rows [irow] .Cells [3] .Value).Trim('');
}
其他
{
TempArticleStr = TempArticleStr + Environment.NewLine + Convert.ToString(DataGridView1.Rows [irow] .Cells [3] .Value).Trim('' );
}
}
}
else if((string)DataGridView1.Rows [irow] .Cells [2] .Value ==sectionName)
{
继续;
}
else if((string)DataGridView1.Rows [irow] .Cells [2] .Value ==articleImageCaption)
{
var test = true;
if((string)DataGridView1.Rows [irow] .Cells [3] .Value!=)
{
string Tstr = Convert.ToString(DataGridView1.Rows [irow]。单元格[3] .Value).Trim('');
string tsearchStr =@+ DataGridView1.Rows [irow] .Cells [0] .Value +_+ DataGridView1.Rows [irow] .Cells [10] .Value +_ caption;
if(TempArticleStr.Contains(tsearchStr))
{
TempArticleStr = TempArticleStr.Replace(tsearchStr,Tstr);
}
}
}
其他
{
if((string)DataGridView1.Rows [irow] .Cells [3] .Value!= )
{
if(string.IsNullOrEmpty(TempArticleStr))
{
TempArticleStr = TempArticleStr + Convert.ToString(DataGridView1.Rows [irow] .Cells [3] .Value ).Trim('');
}
其他
{
TempArticleStr = TempArticleStr + Environment.NewLine + Convert.ToString(DataGridView1.Rows [irow] .Cells [3] .Value).Trim('' );
}
}
}
}
}
if(!string.IsNullOrEmpty(TempArticleStr))
{
TempArticleStr = TempArticleHeadStr + TempArticleStr + ArticleFootStr;
TempArticleStr = EssentialTextReplace(TempArticleStr).ToString();

if(ArticleGreaterFileName == 0)
{
Common.WriteFile(TempArticleFileName,TempArticleStr);
}
else
{
ArticleGreaterFileName = ArticleGreaterFileName + 1;
string STempArticleFileName = TempArticleFileName.Replace(。xml,ArticleGreaterFileName +。xml);
Common.WriteFile(STempArticleFileName,TempArticleStr);
}
TempArticleStr =;
ArticleGreaterFileName = 0;
}
}
btn_CreateArticles.ForeColor = System.Drawing.Color.Red;
MessageBox.Show(Completed Articles,Epub Articles,MessageBoxButtons.OK,MessageBoxIcon.Information);
}
else
{
MessageBox.Show(Book Title Blank,Error Epub Articles,MessageBoxButtons.OK,MessageBoxIcon.Error);
}
}
else
{
MessageBox.Show(结构文件路径不正确,错误电子文章,MessageBoxButtons.OK,MessageBoxIcon.Error) ;
}
}
其他
{
MessageBox.Show(未加载结构文件,错误Epub文章,MessageBoxButtons.OK,MessageBoxIcon.Error);
}
this.Cursor = Cursors.Default;
}

解决方案

STX是控制角色 [ ^ ] 。您可以从字符串中删除它们:



 var stringWithoutControlCharacters = new string(originalString.Where(c =>!char.IsControl) (C))ToArray的()); 


Hello,

I am creating an XML file and it is successfully being created, but in the XML file, I am getting text like this:

Republican promises that the law will be repealed within months



Here, the block after Re is STX. Please refer to the image in the link below:

https://pasteboard.co/HlzdUOR.png

What I have tried:

private void btn_CreateArticles_Click(object sender, EventArgs e)
{
    this.Cursor = Cursors.WaitCursor;
    if (!string.IsNullOrEmpty(StructureFileName))
    {
        if (System.IO.File.Exists(StructureFileName))
        {
            string UnpackDirectory = "";
            string UnpackFile = "";
            UnpackDirectory = System.IO.Path.GetDirectoryName(StructureFileName);
            UnpackFile = System.IO.Path.GetFileNameWithoutExtension(StructureFileName);
            string EpubFolder = "";
            EpubFolder = UnpackDirectory + "\\" + UnpackFile + "_Epub";
            if (!(System.IO.Directory.Exists(EpubFolder)))
            {
                System.IO.Directory.CreateDirectory(EpubFolder);
            }
            if (!(System.IO.Directory.Exists(EpubFolder + "\\OPS")))
            {
                System.IO.Directory.CreateDirectory(EpubFolder + "\\OPS");
            }
            string StrBooKTitle = "";
            StrBooKTitle = txt_BookTitle.Text;
            if (Regex.IsMatch(StrBooKTitle, "\n" + "|" + "\r", RegexOptions.Multiline))
            {
                StrBooKTitle = Regex.Replace(StrBooKTitle, "\n" + "|" + "\r", "", RegexOptions.Multiline);
            }
            //Use Replacer Function here on StrBookTitle

            if (!string.IsNullOrEmpty(StrBooKTitle.Trim(' ')))
            {
                if (DataGridView1.RowCount > 0)
                {
                    string ArticleHeadStr = "";
                    ArticleHeadStr = ArticleHeadStr + "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>" + Environment.NewLine;
                    ArticleHeadStr = ArticleHeadStr + "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">" + Environment.NewLine;
                    ArticleHeadStr = ArticleHeadStr + "<html xmlns:saxon=\"http://saxon.sf.net/\" xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:nitf=\"http://www.nytimes.com/applicationdata/xml/nitf-3-3.dtd\">" + Environment.NewLine;
                    ArticleHeadStr = ArticleHeadStr + "<head>" + Environment.NewLine;
                    ArticleHeadStr = ArticleHeadStr + "<title>" + StrBooKTitle + "</title>" + Environment.NewLine;
                    ArticleHeadStr = ArticleHeadStr + "<link rel=\"stylesheet\" href=\"css/TablesAndFloats.css\" type=\"text/css\"/>" + Environment.NewLine;
                    ArticleHeadStr = ArticleHeadStr + "</head>" + Environment.NewLine;
                    ArticleHeadStr = ArticleHeadStr + "<body>" + Environment.NewLine;
                    ArticleHeadStr = ArticleHeadStr + "<div class=\"clean\"/>" + Environment.NewLine;
                    ArticleHeadStr = ArticleHeadStr + "<div id=\"header\" class=\"masthead\">" + Environment.NewLine;
                    ArticleHeadStr = ArticleHeadStr + "<div class=\"masthead-text\">" + Environment.NewLine;
                    ArticleHeadStr = ArticleHeadStr + "<div id=\"header_title\" class=\"masthead-section\">" + Environment.NewLine;
                    ArticleHeadStr = ArticleHeadStr + "</div>" + Environment.NewLine;
                    ArticleHeadStr = ArticleHeadStr + "</div>" + Environment.NewLine;
                    ArticleHeadStr = ArticleHeadStr + "</div>" + Environment.NewLine;
                    ArticleHeadStr = ArticleHeadStr + "<div>" + Environment.NewLine;
                    string ArticleFootStr = "";
                    ArticleFootStr = Environment.NewLine + "</div>" + Environment.NewLine + "</body>" + Environment.NewLine + "</html>";
                    string TempArticleHeadStr = "";
                    string TempArticleStr = "";
                    string TempArticleFileName = "";
                    int ArticleGreaterFileName = 0;
                    for (var irow = 0; irow < DataGridView1.RowCount; irow++)
                    {
                        if (DataGridView1.Rows[irow].Cells[0].Value != null)
                        {
                            if ((Encoding.Unicode.GetByteCount(TempArticleStr) / 1024.0) > 270)
                            {
                                TempArticleStr = ArticleHeadStr + TempArticleStr + ArticleFootStr;
                                ArticleGreaterFileName = ArticleGreaterFileName + 1;
                                string STempArticleFileName = TempArticleFileName.Replace(".xml", ArticleGreaterFileName + ".xml");
                                TempArticleStr = EssentialTextReplace(TempArticleStr).ToString();
                                Common.WriteFile(STempArticleFileName, TempArticleStr);
                                TempArticleStr = "";
                            }
                            if ((string)DataGridView1.Rows[irow].Cells[2].Value == "article-full-headline")
                            {
                                if (!string.IsNullOrEmpty(TempArticleStr))
                                {
                                    TempArticleStr = TempArticleHeadStr + TempArticleStr + ArticleFootStr;
                                    //EssentialTextReplace
                                    TempArticleStr = EssentialTextReplace(TempArticleStr).ToString();

                                    if (ArticleGreaterFileName == 0)
                                    {
                                        Common.WriteFile(TempArticleFileName, TempArticleStr);
                                    }
                                    else
                                    {
                                        ArticleGreaterFileName = ArticleGreaterFileName + 1;
                                        string STempArticleFileName = TempArticleFileName.Replace(".xml", ArticleGreaterFileName + ".xml");
                                        Common.WriteFile(STempArticleFileName, TempArticleStr);
                                    }
                                    TempArticleStr = "";
                                    ArticleGreaterFileName = 0;
                                }
                                TempArticleFileName = EpubFolder + "\\OPS\\article_" + Convert.ToString(DataGridView1.Rows[irow].Cells[0].Value).Trim(' ') + "-" + Convert.ToString(DataGridView1.Rows[irow].Cells[1].Value).Trim(' ') + ".xml";
                                TempArticleHeadStr = ArticleHeadStr;
                                if ((string)DataGridView1.Rows[irow].Cells[4].Value != "")
                                {
                                    TempArticleHeadStr = TempArticleHeadStr.Replace("@@@", Convert.ToString(DataGridView1.Rows[irow].Cells[4].Value).Trim(' '));
                                }
                                if ((string)DataGridView1.Rows[irow].Cells[3].Value != "")
                                {
                                    if (string.IsNullOrEmpty(TempArticleStr))
                                    {
                                        TempArticleStr = TempArticleStr + Convert.ToString(DataGridView1.Rows[irow].Cells[3].Value).Trim(' ');
                                    }
                                    else
                                    {
                                        TempArticleStr = TempArticleStr + Environment.NewLine + Convert.ToString(DataGridView1.Rows[irow].Cells[3].Value).Trim(' ');
                                    }
                                }
                            }
                            else if ((string)DataGridView1.Rows[irow].Cells[2].Value == "sectionName")
                            {
                                continue;
                            }
                            else if ((string)DataGridView1.Rows[irow].Cells[2].Value == "articleImageCaption")
                            {
                                var test = true;
                                if ((string)DataGridView1.Rows[irow].Cells[3].Value != "")
                                {
                                    string Tstr = Convert.ToString(DataGridView1.Rows[irow].Cells[3].Value).Trim(' ');
                                    string tsearchStr = "@" + DataGridView1.Rows[irow].Cells[0].Value + "_" + DataGridView1.Rows[irow].Cells[10].Value + "_caption";
                                    if (TempArticleStr.Contains(tsearchStr))
                                    {
                                        TempArticleStr = TempArticleStr.Replace(tsearchStr, Tstr);
                                    }
                                }
                            }
                            else
                            {
                                if ((string)DataGridView1.Rows[irow].Cells[3].Value != "")
                                {
                                    if (string.IsNullOrEmpty(TempArticleStr))
                                    {
                                        TempArticleStr = TempArticleStr + Convert.ToString(DataGridView1.Rows[irow].Cells[3].Value).Trim(' ');
                                    }
                                    else
                                    {
                                        TempArticleStr = TempArticleStr + Environment.NewLine + Convert.ToString(DataGridView1.Rows[irow].Cells[3].Value).Trim(' ');
                                    }
                                }
                            }
                        }
                    }
                    if (!string.IsNullOrEmpty(TempArticleStr))
                    {
                        TempArticleStr = TempArticleHeadStr + TempArticleStr + ArticleFootStr;
                        TempArticleStr = EssentialTextReplace(TempArticleStr).ToString();

                        if (ArticleGreaterFileName == 0)
                        {
                            Common.WriteFile(TempArticleFileName, TempArticleStr);
                        }
                        else
                        {
                            ArticleGreaterFileName = ArticleGreaterFileName + 1;
                            string STempArticleFileName = TempArticleFileName.Replace(".xml", ArticleGreaterFileName + ".xml");
                            Common.WriteFile(STempArticleFileName, TempArticleStr);
                        }
                        TempArticleStr = "";
                        ArticleGreaterFileName = 0;
                    }
                }
                btn_CreateArticles.ForeColor = System.Drawing.Color.Red;
                MessageBox.Show("Completed Articles", "Epub Articles", MessageBoxButtons.OK, MessageBoxIcon.Information);
            }
            else
            {
                MessageBox.Show("Book Title Blank", "Error Epub Articles", MessageBoxButtons.OK, MessageBoxIcon.Error);
            }
        }
        else
        {
            MessageBox.Show("Structure File Path Not Correct", "Error Epub Articles", MessageBoxButtons.OK, MessageBoxIcon.Error);
        }
    }
    else
    {
        MessageBox.Show("Structure File Not Loaded", "Error Epub Articles", MessageBoxButtons.OK, MessageBoxIcon.Error);
    }
    this.Cursor = Cursors.Default;
}

解决方案

STX is a control character[^]. You can remove those from a string like so:

var stringWithoutControlCharacters = new string(originalString.Where(c => !char.IsControl(c)).ToArray());


这篇关于如何摆脱XML中的垃圾值?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆