如何拆分超过300kb的xhtml文件 [英] How do I split xhtml files which is more than 300kb

查看:73
本文介绍了如何拆分超过300kb的xhtml文件的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

如果xhtml文件的大小超过300kb,我需要将它们拆分为一个或多个文件,使得生成的文件大小不超过300kb。我正在使用C#和XLinq。最重要的是,必须在所有结果文件中保留良好的格式。我尝试了下面的代码,//方法获取'xhtml'输入文件并在分割后将它们存储在输出位置。代码的问题是,分割文件后格式错误。

下面是代码。

If the size of the xhtml file is more than 300kb, I need to split them to one or more files such that the resultant file size doesn't exceed 300kb. I'm using C# with XLinq. Most importantly, the well-formedness has to be preserved in all the resultant files. I tried the below code, //Method which gets the 'xhtml' input file and stores them in the output location after splitting. The problem with the code is, well-formedness is lost after splitting the files.
Below is the code.

private void XHTMLFileSplitting(string inputfile, string outputloc)
{
    int file_cnt = 0;
    List<string> returnList = new List<string>();
    string split_file = string.Empty;
    string piival_str = Path.GetFileNameWithoutExtension(inputfile);

    if (File.Exists(inputfile))
    {
        FileInfo info = new FileInfo(inputfile);
        double file_size = info.Length / 1024 + 1;

        if (file_size > 290)
        {
            split_file += "<file>";
            string read_html = File.ReadAllText(inputfile, System.Text.Encoding.UTF8);
            string header_txt = "";
            string footer_txt = "</body></html>";
            Match header = Regex.Match(read_html, "<\\?xml( .*?)?>(.*?)<body(.[^<>]*?)?>", RegexOptions.Singleline | RegexOptions.IgnoreCase);

            if (header.Success)
            {
                header_txt = header.Groups[0].ToString();
            }

            string inputFileName =inputfile;
            string outputFileName = outputloc + "\\" + Path.GetFileNameWithoutExtension(inputfile) + "_.xhtml";

            var dec_num = file_size/290;
            decimal dec_no = (decimal)dec_num;
            int numberOfFiles = (int)Math.Ceiling(dec_no);

            //Splitting process

            SplitFile(inputFileName, outputFileName, numberOfFiles, header_txt, footer_txt);

            foreach (string get_split in returnList)
            {
                split_file += "<item>" + Path.GetFileName(get_split) + "</item>";
            }

            split_file += "</file>";

            File.Delete(inputfile);
        }
    }
}

private  SplitFile(string inputFileName, string outputFileName, int numberOfFiles, string header_txt, string footer_txt)
{  
    try 
    {
        string outputFileExtension = Path.GetExtension(outputFileName);
        outputFileName = outputFileName.Replace(outputFileExtension, "");
        StreamReader sr = new StreamReader(inputFileName);
        long fileLength = sr.BaseStream.Length;
        int baseBufferSize = Convert.ToInt32(fileLength / numberOfFiles);
        bool finished = false;
        int fileCount = 1;
        string tmp_last = "";

        while (!(finished)) 
        {
            int bufferSize = baseBufferSize;
            long originalPosition = sr.BaseStream.Position;
            sr.BaseStream.Position += bufferSize;

            if (sr.BaseStream.Position < fileLength)
            {
                while (!(sr.Read() == 10 | sr.Read() == -1))
                {
                    bufferSize += 1;
                }
                bufferSize += 1;
            } 
            else
            {
                bufferSize = Convert.ToInt32(fileLength - originalPosition);
                finished = true;
            }

            sr.BaseStream.Position = originalPosition;
            byte[] buffer = new byte[bufferSize];
            sr.BaseStream.Read(buffer, 0, bufferSize);
            string outputPath = outputFileName + fileCount.ToString() + outputFileExtension;

            returnList.Add(outputPath);
            Computer Mycomp = new Computer();
            Mycomp.FileSystem.WriteAllBytes(outputPath, buffer,true);

            string[] read_line = File.ReadAllLines(outputPath);
            string last_line = "";


            foreach (string last_line_loopVariable in read_line) 
            {
                last_line = last_line + last_line_loopVariable;
            }

            string read_out = File.ReadAllText(outputPath, System.Text.Encoding.Default);

            if (fileCount == 1)
            {
                read_out = Regex.Replace(read_out, Regex.Escape(last_line) + "\\s*$", "");
                read_out += footer_txt;
            }
            else if (fileCount == numberOfFiles)
            {
                read_out = header_txt + tmp_last + read_out;
            }
            else 
            {
                read_out = Regex.Replace(read_out, Regex.Escape(last_line) + "\\s*$", "");
                read_out = header_txt + tmp_last + read_out + footer_txt;
            }

            tmp_last = last_line;
            //read_out = Regex.Replace(read_out, "</body>", " </body>", RegexOptions.Singleline)
            File.WriteAllText(outputPath, read_out);
            fileCount += 1;
        }

        sr.Close();
    }
    catch (Exception ex) 
    {
        throw ex;
    }
}

推荐答案

);
read_out + = footer_txt;
}
else if (fileCount == numberOfFiles)
{
read_out = header_txt + tmp_last + read_out;
}
else
{
read_out = Regex.Replace(read_out,Regex.Escape(last_line)+ \\\\ *
", ""); read_out += footer_txt; } else if (fileCount == numberOfFiles) { read_out = header_txt + tmp_last + read_out; } else { read_out = Regex.Replace(read_out, Regex.Escape(last_line) + "\\s*


);
read_out = header_txt + tmp_last + read_out + footer_txt;
}

tmp_last = last_line;
// read_out = Regex.Replace(read_out,?< / body>,< / body>,RegexOptions.Singleline)
File.WriteAllText(outputPath,read_out) ;
fileCount + = 1 ;
}

sr.Close();
}
catch (例外情况)
{
throw ex;
}
}
", ""); read_out = header_txt + tmp_last + read_out + footer_txt; } tmp_last = last_line; //read_out = Regex.Replace(read_out, "</body>", " </body>", RegexOptions.Singleline) File.WriteAllText(outputPath, read_out); fileCount += 1; } sr.Close(); } catch (Exception ex) { throw ex; } }


这篇关于如何拆分超过300kb的xhtml文件的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
相关文章
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆