如何使用iTextSharp在C#中查找PDF表并将其提取到excel文件 [英] How to find and extract PDF table to excel file in C# using iTextSharp
问题描述
任何人都可以帮我提取PDF表格并以适当的格式输入excel文件。我正在使用iTextSharp dll。
这是经常被问到的问题。请参阅: SearchBox [ ^ ]
< blockquote>命名空间
使用System.Text;
使用System.IO;
使用iTextSharp.text;
使用iTextSharp.text.pdf;
使用iTextSharp.text.pdf.parser;
代码
protected void ExportToExcel(object sender,EventArgs e)
{
if(this.fuPdfUpload.HasFile)
{
string file = Path.GetFullPath(fuPdfUpload.PostedFile.FileName);
this.ExportPDFToExcel(file);
}
}
private void ExportPDFToExcel(string fileName)
{
StringBuilder text = new StringBuilder( );
PdfReader pdfReader = new PdfReader(fileName);
for(int page = 1; page< = pdfReader.NumberOfPages;页面++)
{
ITextExtractionStrategy strategy = new LocationTextExtractionStrategy();
string currentText = PdfTextExtractor.GetTextFromPage(pdfReader,page,strategy);
currentText = Encoding.UTF8.GetString(Encoding.Convert(Encoding.Default,Encoding.UTF8,Encoding.UTF8.GetBytes(currentText)));
text.Append( currentText);
pdfReader.Close();
}
Response.Clear();
响应。 Buffer = true;
Response.AddHeader(content-disposition,attachment; filename = ReceiptExport.xls);
Response.Charset =;
Response.ContentType =application / vnd.ms-excel;
Response.Write(text);
Response.Flush();
Response.End();
}
Can anybody please help me to extract PDF table & put into excel file in proper format. I am using iTextSharp dll.
It is quite often asked question. Please, see: SearchBox[^]
Namespaces
using System.Text;
using System.IO;
using iTextSharp.text;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
Code
protected void ExportToExcel(object sender, EventArgs e)
{
if (this.fuPdfUpload.HasFile)
{
string file = Path.GetFullPath(fuPdfUpload.PostedFile.FileName);
this.ExportPDFToExcel(file);
}
}
private void ExportPDFToExcel(string fileName)
{
StringBuilder text = new StringBuilder();
PdfReader pdfReader = new PdfReader(fileName);
for (int page = 1; page <= pdfReader.NumberOfPages; page++)
{
ITextExtractionStrategy strategy = new LocationTextExtractionStrategy();
string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
currentText = Encoding.UTF8.GetString(Encoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.UTF8.GetBytes(currentText)));
text.Append(currentText);
pdfReader.Close();
}
Response.Clear();
Response.Buffer = true;
Response.AddHeader("content-disposition", "attachment;filename=ReceiptExport.xls");
Response.Charset = "";
Response.ContentType = "application/vnd.ms-excel";
Response.Write(text);
Response.Flush();
Response.End();
}
这篇关于如何使用iTextSharp在C#中查找PDF表并将其提取到excel文件的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!