如何在合并删除空白 [英] How To Remove Whitespace on Merge
问题描述
我有一些code,它需要3个不同的PDF字节数组,并合并他们。这code的伟大工程。问题(有些人)都具有是每个PDF被认为是一个完整的页面(如果打印),即使是仅有例如4英寸上它的内容,从而留下的白色空间7英寸垂直。然后中间文档被放入,并且可以或可以不具有在它的端部的垂直的空白。然后在页脚被放在自己的页面上也是如此。
下面是code:
字节[]字节= rv.LocalReport.Render(PDF,空,出Mime类型,出编码,扩展出去,出去StreamIDs,出警告);
清单<字节[]> MergeSets = //这是此code之前填写//附加任何其他网页此主信
如果(MergeSets.Count大于0){
MemoryStream的毫秒=新的MemoryStream();
文档的文档=新的文件();
PdfCopy副本=新PdfCopy(文件,MS);
document.Open();
PdfImportedPage页面;
PdfReader读卡器=新PdfReader(字节); //读取生成的主信
诠释页= reader.NumberOfPages; 的for(int i = 0; I<页面;){
页= copy.GetImportedPage(读者,我++);
copy.AddPage(页);
} //在求职信的页面的foreach //现在追加合并套
的foreach(在MergeSets的byte [] BA){
读者=新PdfReader(BA);
页= reader.NumberOfPages; 的for(int i = 0; I<页面;){
页= copy.GetImportedPage(读者,我++);
copy.AddPage(页);
} //在当前合并集的页面的foreach
} //该组数据的的foreach document.Close(); ServerSaved = SaveGeneratedLetter(ms.GetBuffer(),DateTime.Now.Year,hl.LetterName,SAVENAME);
} //如果有任何合并
有没有当我合并的每一页夹/删除/在每个PDF年底抹去垂直的白色空间,以便使其显示为一个无缝的文件的方式?
更新:
下面是我试图合并一些样本.pdf文件。
块引用>
更新2:使用应答:
我已经转换@ MKL的code到C#和在这里。
工具类:
块引用>公共类PdfVeryDenseMergeTool { 私人矩形每页;
私人浮动TOPMARGIN;
私人浮动BottomMargin;
私人浮动差距;
私人文件文档= NULL;
私人PdfWriter作家=无效;
私人浮动YPosition = 0; 公共PdfVeryDenseMergeTool(矩形的大小,浮顶,浮底,浮动间隙){
this.PageSize =大小;
this.TopMargin =顶部;
this.BottomMargin =底部;
this.Gap =差距;
} // PdfVeryDenseMergeTool 公共无效合并(MemoryStream的OutputStream中,列表与LT; PdfReader>输入){
尝试{
this.OpenDocument(OutputStream的); 的foreach(在输入PdfReader读者){
this.Merge(读卡器);
} // PDF文件的的foreach合并
} {最后
this.CloseDocument();
} //的try-catch-最后
} //合并 公共无效OpenDocument格式(MemoryStream的OutputStream中){
this.Document =新的文件(每页,36,36,this.TopMargin,this.BottomMargin);
this.Writer = PdfWriter.GetInstance(文档,OutputStream的); this.Document.Open();
this.NewPage();
} // OpenDocument格式 公共无效CloseDocument(){
尝试{
this.Document.Close();
} {最后
this.Document = NULL;
this.Writer = NULL;
this.YPosition = 0;
} //尝试,终于
} // CloseDocument 公共无效NewPage公司(){
this.Document.NewPage();
this.YPosition = PageSize.GetTop(this.TopMargin);
} //合并 公共无效合并(PdfReader读者){
PdfReaderContentParser分析器=新PdfReaderContentParser(读卡器); 对于(INT的PageIndex = 1;的PageIndex< = reader.NumberOfPages;的PageIndex ++){
this.Merge(阅读器,分析器,PageIndex的);
} //当前PDF的页面的foreach
} //合并 公共无效合并(PdfReader读卡器,PdfReaderContentParser解析器,诠释的PageIndex){
PdfImportedPage importedPage = Writer.GetImportedPage(读者的PageIndex);
PdfContentByte directContent = Writer.DirectContent; PageVerticalAnalyzer取景器= parser.ProcessContent(PageIndex的,新PageVerticalAnalyzer()); 如果(finder.VerticalFlips.Count 2)
返回; 矩形pageSizeToImport = reader.GetPageSize(PageIndex的); INT startFlip = finder.VerticalFlips.Count - 1;
布尔第一= TRUE; 而(startFlip大于0){
如果(!第一)
this.NewPage(); 浮可用空间= this.YPosition - PageSize.GetBottom(BottomMargin);
INT endFlip = startFlip + 1; 而((endFlip→1)及及(finder.VerticalFlips [startFlip] - finder.VerticalFlips [endFlip - 2]所述,可用空间))
endFlip - = 2; 如果(endFlip< startFlip){
浮动高度= finder.VerticalFlips [startFlip] - finder.VerticalFlips [endFlip] directContent.SaveState();
directContent.Rectangle(0,this.YPosition - 高度,pageSizeToImport.Width,高度);
directContent.Clip();
directContent.NewPath(); this.Writer.DirectContent.AddTemplate(importedPage,0,this.YPosition - (finder.VerticalFlips [startFlip] - pageSizeToImport.Bottom)); directContent.RestoreState();
this.YPosition - =身高+ this.Gap;
startFlip = endFlip - 1;
}否则如果(!第一){
抛出新的ArgumentException(的String.Format(第{0}的内容太大,的PageIndex));
} //如果 第一= FALSE;
} //而
} //合并
} // PdfVeryDenseMergeTool
该RenderListener类:
更新3:固定1号线code和它的作品:见code注释
块引用>公共类PageVerticalAnalyzer:IRenderListener { 公共PageVerticalAnalyzer(){} 公开名单<浮动> VerticalFlips =新的List<浮动>(); 公共无效AddVerticalUseSection(从浮动,浮动){
如果(至<从){
浮TEMP =来;
为从=;
从=气温;
} INT I = 0;
INT J = 0; 对于(i = 0; I< VerticalFlips.Count;我++){
浮翻转= VerticalFlips [I]
如果(翻转<从)
继续; 为(J = I; J< VerticalFlips.Count; J ++){
翻转= VerticalFlips [J]。
如果(翻转<到)
继续;
打破;
}
打破;
} //垂直翻转的的foreach 布尔fromOutsideInterval = I%2 == 0;
布尔toOutsideInterval = j的2%== 0; 而(j-- I标记)
VerticalFlips.RemoveAt(J); //这是问题符合刚卸下摆臂(J)
如果(toOutsideInterval)
VerticalFlips.Insert(i到);
如果(fromOutsideInterval)
VerticalFlips.Insert(ⅰ,从);
} // AddVerticalUseSection 公共无效BeginTextBlock(){/ *什么也不做* /} 公共无效EndTextBlock(){/ *什么也不做* /} 公共无效RenderImage(ImageRenderInfo renderInfo){
矩阵CTM = renderInfo.GetImageCTM();
清单<浮动> YCoords =新的List<浮动>(4){0,0,0,0}; 为(中间体X = 0; X 2; X ++){
为(中间体Y = 0; Y 2; Y ++){
矢量角落=新的向量(X,Y,1).Cross(CTM)。
YCoords [2 * X + Y =角球[Vector.I2]
}
} YCoords.Sort();
AddVerticalUseSection(YCoords [0],YCoords [3]);
} // RenderImage 公共无效RenderText(TextRenderInfo renderInfo){
线段ascentLine = renderInfo.GetAscentLine();
线段descentLine = renderInfo.GetDescentLine();
清单<浮动> YCoords =新的List<浮动>(四){
ascentLine.GetStartPoint()[Vector.I2]
ascentLine.GetEndPoint()[Vector.I2]
descentLine.GetStartPoint()[Vector.I2]
descentLine.GetEndPoint()[Vector.I2]
}; YCoords.Sort();
AddVerticalUseSection(YCoords [0],YCoords [3]);
} // RenderText
} // PageVericalAnalyzer
code,收集文件,并运行此工具:
块引用>公共无效TestMergeDocuments(){
PdfVeryDenseMergeTool工具=新PdfVeryDenseMergeTool(iTextSharp.text.PageSize.A4,18,18,10);
清单<字节[]>文件=新的List<字节[]>(); // code来装载每个3个文件,我需要这个字节数组列表 使用(MemoryStream的毫秒=新的MemoryStream()){
清单< PdfReader>文件=新的List< PdfReader>(); 的foreach(在文件中的byte [] BA){
files.Add(新PdfReader(BA));
} //该组数据的的foreach tool.Merge(MS,文件); //保存使用文件:ms.GetBuffer()
} //使用内存流
} // TestMergeDocuments
解决方案的下面的示例工具一直沿着工具的想法实施
PdfDenseMergeTool
从<一个HREF =http://stackoverflow.com/a/28024276/1729265>其中OP曾评论这个答案是的如此接近到什么 [他] 的必要性的秒。就像PdfDenseMergeTool
这个工具在这里是用Java / iText的实施而我更在家里比C#/ iTextSharp的。由于OP 已经翻译PdfDenseMergeTool
到C#/ iTextSharp的,翻译这个工具在这里也应该不是太大的问题。的PdfVeryDenseMergeTool
同样这个工具来
PdfDenseMergeTool
需要的页面内容从一些PdfReader
实例的网页,并尝试密集合并它们,即把多个来源的网页内容到一个单一的目标页面是否有足够的可用空间来这样做。相较于更早的工具,这个工具甚至分裂源页面的内容,以便更密集的合并。就像其他工具
PdfVeryDenseMergeTool
不考虑矢量图形考虑,因为iText的(夏普)解析API确实只能向前文字和位图图像的
PdfVeryDenseMergeTool
拆分源的网页其在未通过文字字形或位图图形的边框相交的水平线不完全贴合到目标页面。工具类:
公共类PdfVeryDenseMergeTool
{
公共PdfVeryDenseMergeTool(矩形的大小,浮顶,浮底,浮动间隙)
{
this.pageSize =大小;
this.topMargin =顶部;
this.bottomMargin =底部;
this.gap =差距;
} 公共无效合并(OutputStream的OutputStream的,可迭代&LT; PdfReader&GT;输入)抛出DocumentException,IOException异常
{
尝试
{
使用openDocument(OutputStream的);
对于(PdfReader读者:输入)
{
合并(读卡器);
}
}
最后
{
closeDocument();
}
} 无效使用openDocument(OutputStream的OutputStream中)抛出DocumentException
{
最终文档文件=新的文件(的pageSize,36,36,TOPMARGIN,bottomMargin);
最后PdfWriter作家= PdfWriter.getInstance(文件,OutputStream的);
document.open();
this.document =文件;
this.writer =作家;
NEWPAGE();
} 无效closeDocument()
{
尝试
{
document.close();
}
最后
{
this.document = NULL;
this.writer = NULL;
this.yPosition = 0;
}
} 新页无效()
{
document.newPage();
yPosition = pageSize.getTop(TOPMARGIN);
} 无效合并(PdfReader阅读器)抛出IOException异常
{
PdfReaderContentParser分析器=新PdfReaderContentParser(读卡器);
对于(INT页= 1;页&LT; = reader.getNumberOfPages();页++)
{
合并(阅读器,分析器,页);
}
} 无效合并(PdfReader读卡器,PdfReaderContentParser解析器,诠释页)抛出IOException异常
{
PdfImportedPage importedPage = writer.getImportedPage(读卡器,页);
PdfContentByte directContent = writer.getDirectContent(); PageVerticalAnalyzer取景= parser.processContent(页面,新PageVerticalAnalyzer());
如果(finder.verticalFlips.size()2)
返回;
矩形pageSizeToImport = reader.getPageSize(页); INT startFlip = finder.verticalFlips.size() - 1;
布尔第一= TRUE;
而(startFlip大于0)
{
如果(!第一)
NEWPAGE(); 浮可用空间= yPosition - pageSize.getBottom(bottomMargin);
INT endFlip = startFlip + 1;
而((endFlip→1)及及(finder.verticalFlips.get(startFlip) - finder.verticalFlips.get(endFlip - 2)&下; FREESPACE))
endFlip - = 2;
如果(endFlip&LT; startFlip)
{
浮球高度= finder.verticalFlips.get(startFlip) - finder.verticalFlips.get(endFlip); directContent.saveState();
directContent.rectangle(0,yPosition - 高度,pageSizeToImport.getWidth(),高度);
directContent.clip();
directContent.newPath(); 。writer.getDirectContent()addTemplate(importedPage,0,yPosition - (finder.verticalFlips.get(startFlip) - pageSizeToImport.getBottom())); directContent.restoreState();
yPosition - =身高+差距;
startFlip = endFlip - 1;
}
否则,如果(!第一)
抛出新抛出:IllegalArgumentException(的String.format(页%S含量的部分过大,页));
第一= FALSE;
}
} 文献文件= NULL;
PdfWriter作家=无效;
浮yPosition = 0; 最后矩形的pageSize;
最终浮动TOPMARGIN;
最终浮动bottomMargin;
最终浮动差距;
}的(<一个href=\"https://github.com/mkl-public/testarea-itext5/blob/master/src/main/java/mkl/testarea/itext5/merge/PdfVeryDenseMergeTool.java\">PdfVeryDenseMergeTool.java)
块引用>此工具可使用自定义的
RenderListener
用于与iText的解析器API使用:公共类PageVerticalAnalyzer实现RenderListener
{
@覆盖
公共无效beginTextBlock(){}
@覆盖
公共无效endTextBlock(){} / *
* @see RenderListener#renderText(TextRenderInfo)
* /
@覆盖
公共无效renderText(TextRenderInfo renderInfo)
{
线段ascentLine = renderInfo.getAscentLine();
线段descentLine = renderInfo.getDescentLine();
浮动[] = yCoords新的浮动[] {
ascentLine.getStartPoint()获得(Vector.I2)
ascentLine.getEndPoint()获得(Vector.I2)
descentLine.getStartPoint()获得(Vector.I2)
descentLine.getEndPoint()获得(Vector.I2)
};
Arrays.sort(yCoords);
addVerticalUseSection(yCoords [0],yCoords [3]);
} / *
* @see RenderListener#renderImage(ImageRenderInfo)
* /
@覆盖
公共无效renderImage(ImageRenderInfo renderInfo)
{
矩阵CTM = renderInfo.getImageCTM();
浮动[] = yCoords新的浮动[4];
为(中间体X = 0; X 2; X ++)
为(中间体Y = 0; Y 2; Y +)
{
矢量角落=新的向量(X,Y,1).cross(CTM)。
yCoords [2 * X + Y = corner.get(Vector.I2);
}
Arrays.sort(yCoords);
addVerticalUseSection(yCoords [0],yCoords [3]);
} / **
*由于使用该方法标志着给定区间。
* /
无效addVerticalUseSection(从浮动,浮动)
{
如果(至&lt;从)
{
浮TEMP =来;
为从=;
从=气温;
} INT I = 0,J = 0;
对于(; I&LT; verticalFlips.size();我++)
{
浮翻转= verticalFlips.get(I)
如果(翻转&LT;从)
继续; 为(J = I; J&LT; verticalFlips.size(); J ++)
{
翻转= verticalFlips.get(J);
如果(翻转&LT;到)
继续;
打破;
}
打破;
}
布尔fromOutsideInterval = I%2 == 0;
布尔toOutsideInterval = j的2%== 0; 而(j-- I标记)
verticalFlips.remove(J);
如果(toOutsideInterval)
verticalFlips.add(i到);
如果(fromOutsideInterval)
verticalFlips.add(ⅰ,从);
} 最终名单&LT;浮球GT; verticalFlips =新的ArrayList&LT;浮球GT;();
}的(<一个href=\"https://github.com/mkl-public/testarea-itext5/blob/master/src/main/java/mkl/testarea/itext5/merge/PageVerticalAnalyzer.java\">PageVerticalAnalyzer.java)
块引用>它用于这样的:
PdfVeryDenseMergeTool工具=新PdfVeryDenseMergeTool(PageSize.A4,18,18,5);
tool.merge(输出,输入);的(<一个href=\"https://github.com/mkl-public/testarea-itext5/blob/master/src/test/java/mkl/testarea/itext5/merge/VeryDenseMerging.java\">VeryDenseMerging.java)
块引用>应用到OP的样本文件
的 Header.pdf 的
的 Body.pdf 的
的 Footer.pdf 的
它生成
如果你需要定义一个目标文档页面大小为A5景观:
PdfVeryDenseMergeTool工具=新PdfVeryDenseMergeTool(新RectangleReadOnly(595421),18,18,5);
tool.merge(输出,输入);的(<一个href=\"https://github.com/mkl-public/testarea-itext5/blob/master/src/test/java/mkl/testarea/itext5/merge/VeryDenseMerging.java\">VeryDenseMerging.java)
块引用>它生成的:
当心!的这只是一个概念证明,它并没有考虑所有的可能性。例如。源或目标页面与一个不平凡的旋转值的情况下,不妥善处理。因此,不能用于生产用途呢。
改善电流(5.5.6快照)iText的版本
5.5.6对目前iText的开发版本增强了解析器的功能也信号矢量图形。因此,我延长了
PageVerticalAnalyzer
来利用这一点:公共类PageVerticalAnalyzer实现ExtRenderListener
{
@覆盖
公共无效beginTextBlock(){}
@覆盖
公共无效endTextBlock(){}
@覆盖
公共无效clipPath(INT规则){}
...
静态类SubPathSection
{
公共SubPathSection(浮点X,浮法Y,矩阵m)
{
浮effectiveY = getTransformedY(X,Y,M);
pathFromY = effectiveY;
pathToY = effectiveY;
} 无效extendTo(浮法X,浮法Y,矩阵m)
{
浮effectiveY = getTransformedY(X,Y,M);
如果(effectiveY&LT; pathFromY)
pathFromY = effectiveY;
否则,如果(effectiveY&GT; pathToY)
pathToY = effectiveY;
} 浮getTransformedY(浮法X,浮法Y,矩阵m)
{
返回新的Vector(X,Y,1).cross(M)获得(Vector.I2);
} 浮动getFromY()
{
返回pathFromY;
} 浮动getToY()
{
返回pathToY;
} 私人浮动pathFromY;
私人浮动pathToY;
} / *
*请注意:执行是不正确的,因为它包含曲线的控制点
*它可以是远远超出实际曲线。
*
* @see ExtRenderListener#modifyPath(PathConstructionRenderInfo)
* /
@覆盖
公共无效modifyPath(PathConstructionRenderInfo renderInfo)
{
矩阵CTM = renderInfo.getCtm();
清单&LT;浮球GT; segmentData = renderInfo.getSegmentData(); 开关(renderInfo.getOperation())
{
案例PathConstructionRenderInfo.MOVETO:
子路径= NULL;
案例PathConstructionRenderInfo.LINETO:
案例PathConstructionRenderInfo.CURVE_123:
案例PathConstructionRenderInfo.CURVE_13:
案例PathConstructionRenderInfo.CURVE_23:
的for(int i = 0; I&LT; segmentData.size() - 1; I + = 2)
{
如果(子路径== NULL)
{
子路径=新SubPathSection(segmentData.get(ⅰ),segmentData.get第(i + 1),CTM);
path.add(子路径);
}
其他
subPath.extendTo(segmentData.get(ⅰ),segmentData.get第(i + 1),CTM);
}
打破;
案例PathConstructionRenderInfo.RECT:
浮X = segmentData.get(0);
浮Y = segmentData.get(1);
浮W = segmentData.get(2);
浮动H = segmentData.get(3);
SubPathSection节=新SubPathSection(X,Y,CTM)。
section.extendTo(X + W,Y,CTM)。
section.extendTo(X,Y + H,CTM)。
section.extendTo(X + W,Y + H,CTM)。
path.add(部分);
案例PathConstructionRenderInfo.CLOSE:
子路径= NULL;
打破;
默认:
}
} / *
* @see ExtRenderListener#renderPath(PathPaintingRenderInfo)
* /
@覆盖
公共路径renderPath(PathPaintingRenderInfo renderInfo)
{
如果(renderInfo.getOperation()!= PathPaintingRenderInfo.NO_OP)
{
对于(SubPathSection部分:路径)
addVerticalUseSection(section.getFromY(),section.getToY());
} path.clear();
子路径= NULL;
返回null;
} 清单&LT; SubPathSection&GT;路径=新的ArrayList&LT; SubPathSection&GT;();
SubPathSection子路径= NULL;
...
}的(<一个href=\"https://github.com/mkl-public/testarea-itext5/blob/master/src/main/java/mkl/testarea/itext5/merge/PageVerticalAnalyzer.java\">PageVerticalAnalyzer.java)
块引用>一个简单的测试(<一个href=\"https://github.com/mkl-public/testarea-itext5/blob/master/src/test/java/mkl/testarea/itext5/merge/VeryDenseMerging.java\">VeryDenseMerging.java方法
testMergeOnlyGraphics
)合并这些文件
这个:
但再次的请注意:的这个概念仅仅是证明。尤其是
modifyPath()
有待提高,实现是不正确的,因为它包含曲线的控制点,这可能是目前为止实际曲线之外。I have some code that takes 3 different PDF byte arrays and merges them. This code works great. The issue (some people) are having is that each PDF is considered to be a full page (if printed) even if there is only say 4 inches of content on it, thus leaving 7 inches of white space vertically. Then the middle document gets put in and may or may not have vertical white space at the end of it. Then the footer gets put on its own page as well.
Here is the code:
byte[] Bytes = rv.LocalReport.Render("PDF", null, out MimeType, out Encoding, out Extension, out StreamIDs, out Warnings); List<byte[]> MergeSets = // This is filled prior to this code // Append any other pages to this primary letter if (MergeSets.Count > 0) { MemoryStream ms = new MemoryStream(); Document document = new Document(); PdfCopy copy = new PdfCopy(document, ms); document.Open(); PdfImportedPage page; PdfReader reader = new PdfReader(Bytes); // read the generated primary Letter int pages = reader.NumberOfPages; for (int i = 0; i < pages; ) { page = copy.GetImportedPage(reader, ++i); copy.AddPage(page); } // foreach of the pages in the Cover Letter // Now append the merge sets foreach (byte[] ba in MergeSets) { reader = new PdfReader(ba); pages = reader.NumberOfPages; for (int i = 0; i < pages; ) { page = copy.GetImportedPage(reader, ++i); copy.AddPage(page); } // foreach of the pages in the current merge set } // foreach of the sets of data document.Close(); ServerSaved = SaveGeneratedLetter(ms.GetBuffer(), DateTime.Now.Year, hl.LetterName, SaveName); } // if there is anything to merge
Is there a way when I am merging each page to clip/remove/erase the vertical white space at the end of each pdf so it appears as one seamless document?
UPDATE: Here are some sample .pdf files I am trying to merge.UPDATE 2: USING THE ANSWER:
I have converted @mkl's code to C# and here it is.
The tool class:
public class PdfVeryDenseMergeTool { private Rectangle PageSize; private float TopMargin; private float BottomMargin; private float Gap; private Document Document = null; private PdfWriter Writer = null; private float YPosition = 0; public PdfVeryDenseMergeTool(Rectangle size, float top, float bottom, float gap) { this.PageSize = size; this.TopMargin = top; this.BottomMargin = bottom; this.Gap = gap; } // PdfVeryDenseMergeTool public void Merge(MemoryStream outputStream, List<PdfReader> inputs) { try { this.OpenDocument(outputStream); foreach (PdfReader reader in inputs) { this.Merge(reader); } // foreach of the PDF files to merge } finally { this.CloseDocument(); } // try-catch-finally } // Merge public void OpenDocument(MemoryStream outputStream) { this.Document = new Document(PageSize, 36, 36, this.TopMargin, this.BottomMargin); this.Writer = PdfWriter.GetInstance(Document, outputStream); this.Document.Open(); this.NewPage(); } // OpenDocument public void CloseDocument() { try { this.Document.Close(); } finally { this.Document = null; this.Writer = null; this.YPosition = 0; } // try-finally } // CloseDocument public void NewPage() { this.Document.NewPage(); this.YPosition = PageSize.GetTop(this.TopMargin); } // Merge public void Merge(PdfReader reader) { PdfReaderContentParser parser = new PdfReaderContentParser(reader); for (int pageIndex = 1; pageIndex <= reader.NumberOfPages; pageIndex++) { this.Merge(reader, parser, pageIndex); } // foreach of the pages of the current PDF } // Merge public void Merge(PdfReader reader, PdfReaderContentParser parser, int pageIndex) { PdfImportedPage importedPage = Writer.GetImportedPage(reader, pageIndex); PdfContentByte directContent = Writer.DirectContent; PageVerticalAnalyzer finder = parser.ProcessContent(pageIndex, new PageVerticalAnalyzer()); if (finder.VerticalFlips.Count < 2) return; Rectangle pageSizeToImport = reader.GetPageSize(pageIndex); int startFlip = finder.VerticalFlips.Count - 1; bool first = true; while (startFlip > 0) { if (!first) this.NewPage(); float freeSpace = this.YPosition - PageSize.GetBottom(BottomMargin); int endFlip = startFlip + 1; while ((endFlip > 1) && (finder.VerticalFlips[startFlip] - finder.VerticalFlips[endFlip - 2] < freeSpace)) endFlip -= 2; if (endFlip < startFlip) { float height = finder.VerticalFlips[startFlip] - finder.VerticalFlips[endFlip]; directContent.SaveState(); directContent.Rectangle(0, this.YPosition - height, pageSizeToImport.Width, height); directContent.Clip(); directContent.NewPath(); this.Writer.DirectContent.AddTemplate(importedPage, 0, this.YPosition - (finder.VerticalFlips[startFlip] - pageSizeToImport.Bottom)); directContent.RestoreState(); this.YPosition -= height + this.Gap; startFlip = endFlip - 1; } else if (!first) { throw new ArgumentException(string.Format("Page {0} content too large", pageIndex)); } // if first = false; } // while } // Merge } // PdfVeryDenseMergeTool
The RenderListener class:
UPDATE 3: FIXED 1 LINE OF CODE AND IT WORKS: See comment in code
public class PageVerticalAnalyzer : IRenderListener { public PageVerticalAnalyzer() { } public List<float> VerticalFlips = new List<float>(); public void AddVerticalUseSection(float from, float to) { if (to < from) { float temp = to; to = from; from = temp; } int i = 0; int j = 0; for (i = 0; i < VerticalFlips.Count; i++) { float flip = VerticalFlips[i]; if (flip < from) continue; for (j = i; j < VerticalFlips.Count; j++) { flip = VerticalFlips[j]; if (flip < to) continue; break; } break; } // foreach of the vertical flips bool fromOutsideInterval = i % 2 == 0; bool toOutsideInterval = j % 2 == 0; while (j-- > i) VerticalFlips.RemoveAt(j); // This was the problem line with just .Remove(j) if (toOutsideInterval) VerticalFlips.Insert(i, to); if (fromOutsideInterval) VerticalFlips.Insert(i, from); } // AddVerticalUseSection public void BeginTextBlock() { /* Do nothing */ } public void EndTextBlock() { /* Do nothing */ } public void RenderImage(ImageRenderInfo renderInfo) { Matrix ctm = renderInfo.GetImageCTM(); List<float> YCoords = new List<float>(4) { 0, 0, 0, 0 }; for (int x = 0; x < 2; x++) { for (int y = 0; y < 2; y++) { Vector corner = new Vector(x, y, 1).Cross(ctm); YCoords[2 * x + y] = corner[Vector.I2]; } } YCoords.Sort(); AddVerticalUseSection(YCoords[0], YCoords[3]); } // RenderImage public void RenderText(TextRenderInfo renderInfo) { LineSegment ascentLine = renderInfo.GetAscentLine(); LineSegment descentLine = renderInfo.GetDescentLine(); List<float> YCoords = new List<float>(4) { ascentLine.GetStartPoint()[Vector.I2], ascentLine.GetEndPoint()[Vector.I2], descentLine.GetStartPoint()[Vector.I2], descentLine.GetEndPoint()[Vector.I2], }; YCoords.Sort(); AddVerticalUseSection(YCoords[0], YCoords[3]); } // RenderText } // PageVericalAnalyzer
Code to gather files and run the tool:
public void TestMergeDocuments() { PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(iTextSharp.text.PageSize.A4, 18, 18, 10); List<byte[]> Files = new List<byte[]>(); // Code to load each of the 3 files I need into this byte array list using (MemoryStream ms = new MemoryStream()) { List<PdfReader> files = new List<PdfReader>(); foreach (byte[] ba in Files) { files.Add(new PdfReader(ba)); } // foreach of the sets of data tool.Merge(ms, files); // Save the file using: ms.GetBuffer() } // using the memory stream } // TestMergeDocuments
解决方案The following sample tool has been implemented along the ideas of the tool
PdfDenseMergeTool
from this answer which the OP has commented to be SO close to what [he] NEEDs. Just likePdfDenseMergeTool
this tool here is implemented in Java/iText which I'm more at home with than C#/iTextSharp. As the OP has already translatedPdfDenseMergeTool
to C#/iTextSharp, translating this tool here also should not be too great a problem.PdfVeryDenseMergeTool
This tool similarly to
PdfDenseMergeTool
takes the page contents of pages from a number ofPdfReader
instances and tries to merge them densely, i.e. putting contents of multiple source pages onto a single target page if there is enough free space to do so. In contrast to that earlier tool, this tool even splits source page contents to allow for an even denser merge.Just like that other tool the
PdfVeryDenseMergeTool
does not take vector graphics into account because the iText(Sharp) parsing API does only forward text and bitmap imagesThe
PdfVeryDenseMergeTool
splits source pages which do not completely fit onto a target page at a horizontal line which is not intersected by the bounding boxes of text glyphs or bitmap graphics.The tool class:
public class PdfVeryDenseMergeTool { public PdfVeryDenseMergeTool(Rectangle size, float top, float bottom, float gap) { this.pageSize = size; this.topMargin = top; this.bottomMargin = bottom; this.gap = gap; } public void merge(OutputStream outputStream, Iterable<PdfReader> inputs) throws DocumentException, IOException { try { openDocument(outputStream); for (PdfReader reader: inputs) { merge(reader); } } finally { closeDocument(); } } void openDocument(OutputStream outputStream) throws DocumentException { final Document document = new Document(pageSize, 36, 36, topMargin, bottomMargin); final PdfWriter writer = PdfWriter.getInstance(document, outputStream); document.open(); this.document = document; this.writer = writer; newPage(); } void closeDocument() { try { document.close(); } finally { this.document = null; this.writer = null; this.yPosition = 0; } } void newPage() { document.newPage(); yPosition = pageSize.getTop(topMargin); } void merge(PdfReader reader) throws IOException { PdfReaderContentParser parser = new PdfReaderContentParser(reader); for (int page = 1; page <= reader.getNumberOfPages(); page++) { merge(reader, parser, page); } } void merge(PdfReader reader, PdfReaderContentParser parser, int page) throws IOException { PdfImportedPage importedPage = writer.getImportedPage(reader, page); PdfContentByte directContent = writer.getDirectContent(); PageVerticalAnalyzer finder = parser.processContent(page, new PageVerticalAnalyzer()); if (finder.verticalFlips.size() < 2) return; Rectangle pageSizeToImport = reader.getPageSize(page); int startFlip = finder.verticalFlips.size() - 1; boolean first = true; while (startFlip > 0) { if (!first) newPage(); float freeSpace = yPosition - pageSize.getBottom(bottomMargin); int endFlip = startFlip + 1; while ((endFlip > 1) && (finder.verticalFlips.get(startFlip) - finder.verticalFlips.get(endFlip - 2) < freeSpace)) endFlip -=2; if (endFlip < startFlip) { float height = finder.verticalFlips.get(startFlip) - finder.verticalFlips.get(endFlip); directContent.saveState(); directContent.rectangle(0, yPosition - height, pageSizeToImport.getWidth(), height); directContent.clip(); directContent.newPath(); writer.getDirectContent().addTemplate(importedPage, 0, yPosition - (finder.verticalFlips.get(startFlip) - pageSizeToImport.getBottom())); directContent.restoreState(); yPosition -= height + gap; startFlip = endFlip - 1; } else if (!first) throw new IllegalArgumentException(String.format("Page %s content sections too large.", page)); first = false; } } Document document = null; PdfWriter writer = null; float yPosition = 0; final Rectangle pageSize; final float topMargin; final float bottomMargin; final float gap; }
This tool makes use of a custom
RenderListener
for use with the iText parser API:public class PageVerticalAnalyzer implements RenderListener { @Override public void beginTextBlock() { } @Override public void endTextBlock() { } /* * @see RenderListener#renderText(TextRenderInfo) */ @Override public void renderText(TextRenderInfo renderInfo) { LineSegment ascentLine = renderInfo.getAscentLine(); LineSegment descentLine = renderInfo.getDescentLine(); float[] yCoords = new float[]{ ascentLine.getStartPoint().get(Vector.I2), ascentLine.getEndPoint().get(Vector.I2), descentLine.getStartPoint().get(Vector.I2), descentLine.getEndPoint().get(Vector.I2) }; Arrays.sort(yCoords); addVerticalUseSection(yCoords[0], yCoords[3]); } /* * @see RenderListener#renderImage(ImageRenderInfo) */ @Override public void renderImage(ImageRenderInfo renderInfo) { Matrix ctm = renderInfo.getImageCTM(); float[] yCoords = new float[4]; for (int x=0; x < 2; x++) for (int y=0; y < 2; y++) { Vector corner = new Vector(x, y, 1).cross(ctm); yCoords[2*x+y] = corner.get(Vector.I2); } Arrays.sort(yCoords); addVerticalUseSection(yCoords[0], yCoords[3]); } /** * This method marks the given interval as used. */ void addVerticalUseSection(float from, float to) { if (to < from) { float temp = to; to = from; from = temp; } int i=0, j=0; for (; i<verticalFlips.size(); i++) { float flip = verticalFlips.get(i); if (flip < from) continue; for (j=i; j<verticalFlips.size(); j++) { flip = verticalFlips.get(j); if (flip < to) continue; break; } break; } boolean fromOutsideInterval = i%2==0; boolean toOutsideInterval = j%2==0; while (j-- > i) verticalFlips.remove(j); if (toOutsideInterval) verticalFlips.add(i, to); if (fromOutsideInterval) verticalFlips.add(i, from); } final List<Float> verticalFlips = new ArrayList<Float>(); }
It is used like this:
PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(PageSize.A4, 18, 18, 5); tool.merge(output, inputs);
Applied to the OP's sample documents
Header.pdf
Body.pdf
Footer.pdf
it generates
If one defines the target document page size to be A5 landscape:
PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(new RectangleReadOnly(595,421), 18, 18, 5); tool.merge(output, inputs);
it generates this:
Beware! This is only a proof of concept and it does not consider all possibilities. E.g. the case of source or target pages with a non-trivial Rotate value is not properly handled. Thus, it is not ready for production use yet.
Improvement in current (5.5.6 SNAPSHOT) iText version
The current iText development version towards 5.5.6 enhances the parser functionality to also signal vector graphics. Thus, I extended the
PageVerticalAnalyzer
to make use of this:public class PageVerticalAnalyzer implements ExtRenderListener { @Override public void beginTextBlock() { } @Override public void endTextBlock() { } @Override public void clipPath(int rule) { } ... static class SubPathSection { public SubPathSection(float x, float y, Matrix m) { float effectiveY = getTransformedY(x, y, m); pathFromY = effectiveY; pathToY = effectiveY; } void extendTo(float x, float y, Matrix m) { float effectiveY = getTransformedY(x, y, m); if (effectiveY < pathFromY) pathFromY = effectiveY; else if (effectiveY > pathToY) pathToY = effectiveY; } float getTransformedY(float x, float y, Matrix m) { return new Vector(x, y, 1).cross(m).get(Vector.I2); } float getFromY() { return pathFromY; } float getToY() { return pathToY; } private float pathFromY; private float pathToY; } /* * Beware: The implementation is not correct as it includes the control points of curves * which may be far outside the actual curve. * * @see ExtRenderListener#modifyPath(PathConstructionRenderInfo) */ @Override public void modifyPath(PathConstructionRenderInfo renderInfo) { Matrix ctm = renderInfo.getCtm(); List<Float> segmentData = renderInfo.getSegmentData(); switch (renderInfo.getOperation()) { case PathConstructionRenderInfo.MOVETO: subPath = null; case PathConstructionRenderInfo.LINETO: case PathConstructionRenderInfo.CURVE_123: case PathConstructionRenderInfo.CURVE_13: case PathConstructionRenderInfo.CURVE_23: for (int i = 0; i < segmentData.size()-1; i+=2) { if (subPath == null) { subPath = new SubPathSection(segmentData.get(i), segmentData.get(i+1), ctm); path.add(subPath); } else subPath.extendTo(segmentData.get(i), segmentData.get(i+1), ctm); } break; case PathConstructionRenderInfo.RECT: float x = segmentData.get(0); float y = segmentData.get(1); float w = segmentData.get(2); float h = segmentData.get(3); SubPathSection section = new SubPathSection(x, y, ctm); section.extendTo(x+w, y, ctm); section.extendTo(x, y+h, ctm); section.extendTo(x+w, y+h, ctm); path.add(section); case PathConstructionRenderInfo.CLOSE: subPath = null; break; default: } } /* * @see ExtRenderListener#renderPath(PathPaintingRenderInfo) */ @Override public Path renderPath(PathPaintingRenderInfo renderInfo) { if (renderInfo.getOperation() != PathPaintingRenderInfo.NO_OP) { for (SubPathSection section : path) addVerticalUseSection(section.getFromY(), section.getToY()); } path.clear(); subPath = null; return null; } List<SubPathSection> path = new ArrayList<SubPathSection>(); SubPathSection subPath = null; ... }
A simple test (VeryDenseMerging.java method
testMergeOnlyGraphics
) merges these filesinto this:
But once again beware: this is a mere proof of concept. Especially
modifyPath()
needs to be improved, the implementation is not correct as it includes the control points of curves which may be far outside the actual curve.这篇关于如何在合并删除空白的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!