如何拆分超过300kb的xhtml文件 [英] How do I split xhtml files which is more than 300kb
本文介绍了如何拆分超过300kb的xhtml文件的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
如果xhtml文件的大小超过300kb,我需要将它们拆分为一个或多个文件,使得生成的文件大小不超过300kb。我正在使用C#和XLinq。最重要的是,必须在所有结果文件中保留良好的格式。我尝试了下面的代码,//方法获取'xhtml'输入文件并在分割后将它们存储在输出位置。代码的问题是,分割文件后格式错误。
下面是代码。
If the size of the xhtml file is more than 300kb, I need to split them to one or more files such that the resultant file size doesn't exceed 300kb. I'm using C# with XLinq. Most importantly, the well-formedness has to be preserved in all the resultant files. I tried the below code, //Method which gets the 'xhtml' input file and stores them in the output location after splitting. The problem with the code is, well-formedness is lost after splitting the files.
Below is the code.
private void XHTMLFileSplitting(string inputfile, string outputloc)
{
int file_cnt = 0;
List<string> returnList = new List<string>();
string split_file = string.Empty;
string piival_str = Path.GetFileNameWithoutExtension(inputfile);
if (File.Exists(inputfile))
{
FileInfo info = new FileInfo(inputfile);
double file_size = info.Length / 1024 + 1;
if (file_size > 290)
{
split_file += "<file>";
string read_html = File.ReadAllText(inputfile, System.Text.Encoding.UTF8);
string header_txt = "";
string footer_txt = "</body></html>";
Match header = Regex.Match(read_html, "<\\?xml( .*?)?>(.*?)<body(.[^<>]*?)?>", RegexOptions.Singleline | RegexOptions.IgnoreCase);
if (header.Success)
{
header_txt = header.Groups[0].ToString();
}
string inputFileName =inputfile;
string outputFileName = outputloc + "\\" + Path.GetFileNameWithoutExtension(inputfile) + "_.xhtml";
var dec_num = file_size/290;
decimal dec_no = (decimal)dec_num;
int numberOfFiles = (int)Math.Ceiling(dec_no);
//Splitting process
SplitFile(inputFileName, outputFileName, numberOfFiles, header_txt, footer_txt);
foreach (string get_split in returnList)
{
split_file += "<item>" + Path.GetFileName(get_split) + "</item>";
}
split_file += "</file>";
File.Delete(inputfile);
}
}
}
private SplitFile(string inputFileName, string outputFileName, int numberOfFiles, string header_txt, string footer_txt)
{
try
{
string outputFileExtension = Path.GetExtension(outputFileName);
outputFileName = outputFileName.Replace(outputFileExtension, "");
StreamReader sr = new StreamReader(inputFileName);
long fileLength = sr.BaseStream.Length;
int baseBufferSize = Convert.ToInt32(fileLength / numberOfFiles);
bool finished = false;
int fileCount = 1;
string tmp_last = "";
while (!(finished))
{
int bufferSize = baseBufferSize;
long originalPosition = sr.BaseStream.Position;
sr.BaseStream.Position += bufferSize;
if (sr.BaseStream.Position < fileLength)
{
while (!(sr.Read() == 10 | sr.Read() == -1))
{
bufferSize += 1;
}
bufferSize += 1;
}
else
{
bufferSize = Convert.ToInt32(fileLength - originalPosition);
finished = true;
}
sr.BaseStream.Position = originalPosition;
byte[] buffer = new byte[bufferSize];
sr.BaseStream.Read(buffer, 0, bufferSize);
string outputPath = outputFileName + fileCount.ToString() + outputFileExtension;
returnList.Add(outputPath);
Computer Mycomp = new Computer();
Mycomp.FileSystem.WriteAllBytes(outputPath, buffer,true);
string[] read_line = File.ReadAllLines(outputPath);
string last_line = "";
foreach (string last_line_loopVariable in read_line)
{
last_line = last_line + last_line_loopVariable;
}
string read_out = File.ReadAllText(outputPath, System.Text.Encoding.Default);
if (fileCount == 1)
{
read_out = Regex.Replace(read_out, Regex.Escape(last_line) + "\\s*$", "");
read_out += footer_txt;
}
else if (fileCount == numberOfFiles)
{
read_out = header_txt + tmp_last + read_out;
}
else
{
read_out = Regex.Replace(read_out, Regex.Escape(last_line) + "\\s*$", "");
read_out = header_txt + tmp_last + read_out + footer_txt;
}
tmp_last = last_line;
//read_out = Regex.Replace(read_out, "</body>", " </body>", RegexOptions.Singleline)
File.WriteAllText(outputPath, read_out);
fileCount += 1;
}
sr.Close();
}
catch (Exception ex)
{
throw ex;
}
}
推荐答案
, );
read_out + = footer_txt;
}
else if (fileCount == numberOfFiles)
{
read_out = header_txt + tmp_last + read_out;
}
else
{
read_out = Regex.Replace(read_out,Regex.Escape(last_line)+ \\\\ *
", ""); read_out += footer_txt; } else if (fileCount == numberOfFiles) { read_out = header_txt + tmp_last + read_out; } else { read_out = Regex.Replace(read_out, Regex.Escape(last_line) + "\\s*
, );
read_out = header_txt + tmp_last + read_out + footer_txt;
}
tmp_last = last_line;
// read_out = Regex.Replace(read_out,?< / body>,< / body>,RegexOptions.Singleline)
File.WriteAllText(outputPath,read_out) ;
fileCount + = 1 ;
}
sr.Close();
}
catch (例外情况)
{
throw ex;
}
}
", ""); read_out = header_txt + tmp_last + read_out + footer_txt; } tmp_last = last_line; //read_out = Regex.Replace(read_out, "</body>", " </body>", RegexOptions.Singleline) File.WriteAllText(outputPath, read_out); fileCount += 1; } sr.Close(); } catch (Exception ex) { throw ex; } }
这篇关于如何拆分超过300kb的xhtml文件的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!
查看全文